UPSET_COUNT = 20 UPSET_FACTOR = 1.1 ROAD_WIN_FACTOR = 1.1 QUALITY_FACTOR = 1.25 RANKING_FACTOR = 0.5 STRENGTH_OF_SCHEDULE_FACTOR = 1.5 def fetch(uri_str, limit = 10) require 'uri' require 'net/http' raise ArgumentError, 'HTTP redirect too deep' if limit == 0 response = Net::HTTP.get_response(URI.parse(uri_str)) case response when Net::HTTPSuccess then response when Net::HTTPRedirection then fetch(response['location'], limit - 1) else response.error! end end def parse_record(team_table_html) record = {} record['points_scored'] = 0 record['points_allowed'] = 0 record['schedule'] = [] record['win'] = 0 record['loss'] = 0 html_lines = team_table_html.split(/\n/) # Remove the lines we don't need (last line, opening of table line): junk = html_lines.shift junk = html_lines.shift junk = html_lines.pop junk = "" # Parse out the name: record['name'] = html_lines.shift.gsub(/<(.|\n)*?>/,"").sub(/\s\([A-Z]+\)/, "").chomp record['name'] = record['name'].sub(" (Big 12)","").sub(" (Big Ten)","").sub(" (Pac 10)","").sub(" (Big East)","").sub(" (Sun Belt)","").sub(" (Independent)","") html_lines.each do |line| line = line.gsub(" align=\"right\"", "").gsub("", "\t").gsub!(/<(.|\n)*?>/,"") line_array = line.split(/\t/) if line_array[4] != "W" and line_array[4] != "L" next end record['points_scored'] += line_array[5].to_f record['points_allowed'] += line_array[6].to_f game_hash = {} game_hash['name'] = line_array[3].sub(/^\*/,"").chomp game_hash['away?'] = (line_array[2].chomp == "@") game_hash['win?'] = (line_array[4].chomp == "W") if game_hash['win?'] record['win'] += 1 else record['loss'] += 1 end record['schedule'].push(game_hash) end record['scoring_offense'] = record['points_scored'] / record['schedule'].length record['scoring_defense'] = record['points_allowed'] / record['schedule'].length record end def parse_html(rankings_html) team_hash = {} teams_html = rankings_html.split(/^ /) junk = teams_html.shift teams_html.each do |team_table_html| team_hash_entry = parse_record(team_table_html) team_hash[team_hash_entry['name']] = team_hash_entry end team_hash end def calculate_quality(team_hash) quality_hash = {} total_teams = team_hash.length offense_quality_hash = {} defense_quality_hash = {} team_hash.each_pair do |name, team| offense_quality_hash[name] = team['scoring_offense'] defense_quality_hash[name] = team['scoring_defense'] end # Sort by points scored, from highest to lowest: offense_quality_array = offense_quality_hash.sort {|a,b| b[1]<=>a[1]} # Sort by points allowed, from lowest to highest: defense_quality_array = defense_quality_hash.sort {|a,b| a[1]<=>b[1]} quality_hash = {} puts "Offense Quality Rankings:" offense_quality_array.each_index do |i| team_name = offense_quality_array[i][0] team_hash[team_name]["offense_quality"] = i puts "#{i+1}. #{offense_quality_array[i][0]} #{offense_quality_array[i][1]}" end puts "\n\n" puts "Defense Quality Rankings:" defense_quality_array.each_index do |i| team_name = defense_quality_array[i][0] team_hash[team_name]["defense_quality"] = i puts "#{i+1}. #{defense_quality_array[i][0]} #{defense_quality_array[i][1]}" # Different methods of calculating overall quality: # Sadly, this gives the best results, but it isn't allowed, as it uses average margin of victory: #quality_hash[team_name] = team_hash[team_name]['scoring_offense'] - team_hash[team_name]['scoring_defense'] #quality_hash[team_name] = (team_hash[team_name]["defense_quality"] + team_hash[team_name]["offense_quality"]) / 2 quality_hash[team_name] = ((total_teams - team_hash[team_name]["offense_quality"]) + (total_teams - team_hash[team_name]["defense_quality"])) / 2 end puts "\n\n" puts "Overall Quality Rankings:" quality_array = quality_hash.sort {|a,b| b[1]<=>a[1]} #quality_array = quality_hash.sort {|a,b| a[1]<=>b[1]} previous_value = -1 quality_array.each_index do |i| team_name = quality_array[i][0] team_quality = quality_array[i][1].to_i if team_quality == previous_value i -= 1 end team_hash[team_name]["quality"] = i previous_value = team_quality puts "#{i+1}. #{quality_array[i][0]} #{quality_array[i][1]}" end puts "\n\n" team_hash end def calculate_rankings(team_hash) rankings_hash = {} total_teams = team_hash.length team_hash.each_pair do |name, team| # Formula: # For a win: value is increased by inverse quality differnetial # For a loss: value is decreased by quality differential strength_of_schedule = 0 ranking = 0 puts "#{name} #{team['win']}-#{team['loss']} (#{team['quality']})" team["schedule"].each do |game| # If a team plays an FCS squad, we rank them as the last, in terms of quality if team_hash.has_key? game['name'] opponent_quality = team_hash[game["name"]]["quality"] fcs = false else opponent_quality = total_teams fcs = true end strength_of_schedule += opponent_quality quality_differential = (team["quality"] - opponent_quality).abs if fcs # Punish teams mercilessly for playing an FCS squad: quality_differential = total_teams else if opponent_quality < team["quality"] # Upset: if game["win?"] && quality_differential > UPSET_COUNT quality_differential /= UPSET_FACTOR end else # Upset: if !game["win?"] && quality_differential > UPSET_COUNT quality_differential *= UPSET_FACTOR end end end # Give a boost for a road win: if game["away?"] && game["win?"] quality_differential /= ROAD_WIN_FACTOR end quality_differential = (total_teams - quality_differential).abs # Experimental feature: # In addition to calculating raw quality differential, we weight it based on how far # the team's quality ranking is from the absolute max possible score. This way, if you # beat a bunch of teams that are of similar rank (small quality difference), but your # quality is actually really low, we can correct for it. This allows for teams like # Temple, who are 7-2 against bad teams, to be ranked lower than they were under the # previous ranking rules. percent_deviation = 1.0 - (team["quality"].to_f / total_teams.to_f) quality_differential *= percent_deviation if game["win?"] ranking += quality_differential else ranking -= quality_differential end puts "\t#{game["name"]}(#{opponent_quality}) #{quality_differential} (#{ranking})" end strength_of_schedule = total_teams - (strength_of_schedule.to_f / (team["win"] + team["loss"])) # Add strength of schedule, ranking, & quality of team. Multiple by win percentage: rankings_hash[name] = ((strength_of_schedule * STRENGTH_OF_SCHEDULE_FACTOR) + (ranking * RANKING_FACTOR) + ((total_teams - team['quality']) * QUALITY_FACTOR)) * (team['win'].to_f / (team['win'] + team['loss'])) end rankings_hash.sort {|a,b| b[1]<=>a[1]} end if File.exists? "Sked2009.htm" rankings_html = IO.readlines("Sked2009.htm").join() else rankings_html = fetch('http://www.jhowell.net/cf/scores/Sked2009.htm').body end @team_hash = parse_html(rankings_html) @team_hash = calculate_quality(@team_hash) @rankings_array = calculate_rankings(@team_hash) puts "\n\n" @rankings_array.each_index do |i| puts "#{i+1}. #{@rankings_array[i][0]} #{@team_hash[@rankings_array[i][0]]['win']}-#{@team_hash[@rankings_array[i][0]]['loss']} (#{@rankings_array[i][1]})" end