UPSET_COUNT = 20
UPSET_FACTOR = 1.1
ROAD_WIN_FACTOR = 1.1
QUALITY_FACTOR = 1.25
RANKING_FACTOR = 0.5
STRENGTH_OF_SCHEDULE_FACTOR = 1.5
def fetch(uri_str, limit = 10)
require 'uri'
require 'net/http'
raise ArgumentError, 'HTTP redirect too deep' if limit == 0
response = Net::HTTP.get_response(URI.parse(uri_str))
case response
when Net::HTTPSuccess then response
when Net::HTTPRedirection then fetch(response['location'], limit - 1)
else
response.error!
end
end
def parse_record(team_table_html)
record = {}
record['points_scored'] = 0
record['points_allowed'] = 0
record['schedule'] = []
record['win'] = 0
record['loss'] = 0
html_lines = team_table_html.split(/\n/)
# Remove the lines we don't need (last line, opening of table line):
junk = html_lines.shift
junk = html_lines.shift
junk = html_lines.pop
junk = ""
# Parse out the name:
record['name'] = html_lines.shift.gsub(/<(.|\n)*?>/,"").sub(/\s\([A-Z]+\)/, "").chomp
record['name'] = record['name'].sub(" (Big 12)","").sub(" (Big Ten)","").sub(" (Pac 10)","").sub(" (Big East)","").sub(" (Sun Belt)","").sub(" (Independent)","")
html_lines.each do |line|
line = line.gsub(" align=\"right\"", "").gsub("
", "\t").gsub!(/<(.|\n)*?>/,"")
line_array = line.split(/\t/)
if line_array[4] != "W" and line_array[4] != "L"
next
end
record['points_scored'] += line_array[5].to_f
record['points_allowed'] += line_array[6].to_f
game_hash = {}
game_hash['name'] = line_array[3].sub(/^\*/,"").chomp
game_hash['away?'] = (line_array[2].chomp == "@")
game_hash['win?'] = (line_array[4].chomp == "W")
if game_hash['win?']
record['win'] += 1
else
record['loss'] += 1
end
record['schedule'].push(game_hash)
end
record['scoring_offense'] = record['points_scored'] / record['schedule'].length
record['scoring_defense'] = record['points_allowed'] / record['schedule'].length
record
end
def parse_html(rankings_html)
team_hash = {}
teams_html = rankings_html.split(/^ /)
junk = teams_html.shift
teams_html.each do |team_table_html|
team_hash_entry = parse_record(team_table_html)
team_hash[team_hash_entry['name']] = team_hash_entry
end
team_hash
end
def calculate_quality(team_hash)
quality_hash = {}
total_teams = team_hash.length
offense_quality_hash = {}
defense_quality_hash = {}
team_hash.each_pair do |name, team|
offense_quality_hash[name] = team['scoring_offense']
defense_quality_hash[name] = team['scoring_defense']
end
# Sort by points scored, from highest to lowest:
offense_quality_array = offense_quality_hash.sort {|a,b| b[1]<=>a[1]}
# Sort by points allowed, from lowest to highest:
defense_quality_array = defense_quality_hash.sort {|a,b| a[1]<=>b[1]}
quality_hash = {}
puts "Offense Quality Rankings:"
offense_quality_array.each_index do |i|
team_name = offense_quality_array[i][0]
team_hash[team_name]["offense_quality"] = i
puts "#{i+1}. #{offense_quality_array[i][0]} #{offense_quality_array[i][1]}"
end
puts "\n\n"
puts "Defense Quality Rankings:"
defense_quality_array.each_index do |i|
team_name = defense_quality_array[i][0]
team_hash[team_name]["defense_quality"] = i
puts "#{i+1}. #{defense_quality_array[i][0]} #{defense_quality_array[i][1]}"
# Different methods of calculating overall quality:
# Sadly, this gives the best results, but it isn't allowed, as it uses average margin of victory:
#quality_hash[team_name] = team_hash[team_name]['scoring_offense'] - team_hash[team_name]['scoring_defense']
#quality_hash[team_name] = (team_hash[team_name]["defense_quality"] + team_hash[team_name]["offense_quality"]) / 2
quality_hash[team_name] = ((total_teams - team_hash[team_name]["offense_quality"]) + (total_teams - team_hash[team_name]["defense_quality"])) / 2
end
puts "\n\n"
puts "Overall Quality Rankings:"
quality_array = quality_hash.sort {|a,b| b[1]<=>a[1]}
#quality_array = quality_hash.sort {|a,b| a[1]<=>b[1]}
previous_value = -1
quality_array.each_index do |i|
team_name = quality_array[i][0]
team_quality = quality_array[i][1].to_i
if team_quality == previous_value
i -= 1
end
team_hash[team_name]["quality"] = i
previous_value = team_quality
puts "#{i+1}. #{quality_array[i][0]} #{quality_array[i][1]}"
end
puts "\n\n"
team_hash
end
def calculate_rankings(team_hash)
rankings_hash = {}
total_teams = team_hash.length
team_hash.each_pair do |name, team|
# Formula:
# For a win: value is increased by inverse quality differnetial
# For a loss: value is decreased by quality differential
strength_of_schedule = 0
ranking = 0
puts "#{name} #{team['win']}-#{team['loss']} (#{team['quality']})"
team["schedule"].each do |game|
# If a team plays an FCS squad, we rank them as the last, in terms of quality
if team_hash.has_key? game['name']
opponent_quality = team_hash[game["name"]]["quality"]
fcs = false
else
opponent_quality = total_teams
fcs = true
end
strength_of_schedule += opponent_quality
quality_differential = (team["quality"] - opponent_quality).abs
if fcs
# Punish teams mercilessly for playing an FCS squad:
quality_differential = total_teams
else
if opponent_quality < team["quality"]
# Upset:
if game["win?"] && quality_differential > UPSET_COUNT
quality_differential /= UPSET_FACTOR
end
else
# Upset:
if !game["win?"] && quality_differential > UPSET_COUNT
quality_differential *= UPSET_FACTOR
end
end
end
# Give a boost for a road win:
if game["away?"] && game["win?"]
quality_differential /= ROAD_WIN_FACTOR
end
quality_differential = (total_teams - quality_differential).abs
# Experimental feature:
# In addition to calculating raw quality differential, we weight it based on how far
# the team's quality ranking is from the absolute max possible score. This way, if you
# beat a bunch of teams that are of similar rank (small quality difference), but your
# quality is actually really low, we can correct for it. This allows for teams like
# Temple, who are 7-2 against bad teams, to be ranked lower than they were under the
# previous ranking rules.
percent_deviation = 1.0 - (team["quality"].to_f / total_teams.to_f)
quality_differential *= percent_deviation
if game["win?"]
ranking += quality_differential
else
ranking -= quality_differential
end
puts "\t#{game["name"]}(#{opponent_quality}) #{quality_differential} (#{ranking})"
end
strength_of_schedule = total_teams - (strength_of_schedule.to_f / (team["win"] + team["loss"]))
# Add strength of schedule, ranking, & quality of team. Multiple by win percentage:
rankings_hash[name] = ((strength_of_schedule * STRENGTH_OF_SCHEDULE_FACTOR) + (ranking * RANKING_FACTOR) + ((total_teams - team['quality']) * QUALITY_FACTOR)) * (team['win'].to_f / (team['win'] + team['loss']))
end
rankings_hash.sort {|a,b| b[1]<=>a[1]}
end
if File.exists? "Sked2009.htm"
rankings_html = IO.readlines("Sked2009.htm").join()
else
rankings_html = fetch('http://www.jhowell.net/cf/scores/Sked2009.htm').body
end
@team_hash = parse_html(rankings_html)
@team_hash = calculate_quality(@team_hash)
@rankings_array = calculate_rankings(@team_hash)
puts "\n\n"
@rankings_array.each_index do |i|
puts "#{i+1}. #{@rankings_array[i][0]} #{@team_hash[@rankings_array[i][0]]['win']}-#{@team_hash[@rankings_array[i][0]]['loss']} (#{@rankings_array[i][1]})"
end
|