There are 3 ways to generate this list. One is Amazon's Alexa API, the Compete API, and by scraping Quantcast, which give different results. Many sites are directly measured by quantcast, making those numbers much more reliable than either Alexa or Compete.
Amazon
Here is the code we use to generate the list. It uses Amazon's commercial AWIS API -- you must have an account and enter your access keys at the top to run this script!
If you want the 3-month average, use the 19th script block instead of the 17th (line 14)
Updated 3/29/08 - better error handling for sites that drop out of the rankings.
Update 07/09/08 - Only showing the top 100. Also, better formatting of results so you can quickly and easily update the top100 page in source mode.
#!/usr/bin/env ruby
require 'rubygems'
require 'hpricot'
require 'open-uri'
require "cgi"
require "base64"
require "openssl"
require "digest/sha1"
require "uri"
require "net/https"
require "xmlsimple"
require "time"
ACCESS_KEY_ID = ""
SECRET_ACCESS_KEY = ""
action = "UrlInfo"
responseGroup = "Rank"
timestamp = ( Time::now ).utc.strftime("%Y-%m-%dT%H:%M:%S.000Z")
signature = Base64.encode64( OpenSSL::HMAC.digest( OpenSSL::Digest::Digest.new( "sha1" ), SECRET_ACCESS_KEY, action + timestamp)).strip
rails100 = Hpricot(open("http://rails100.pbwiki.com/Alexa+Rankings"))
uris = rails100.search("#wikipage-inner ol li a").collect do |url|
url.attributes["href"]
end.reject {|uri| uri =~ /alexa/}.uniq
# uncomment this line to test changes without running up your AWIS bill
# uris = "http://twitter.com", "http://altrulist.org.uk"
top100 = uris.inject({}) do |list, url|
begin
sent = URI.parse("http://awis.amazonaws.com?" +
{
"Action" => action,
"AWSAccessKeyId" => ACCESS_KEY_ID,
"Signature" => signature,
"Timestamp" => timestamp,
"ResponseGroup" => responseGroup,
"Url" => url
}.to_a.collect{|item| item.first + "=" + CGI::escape(item.last) }.join("&") # Put key value pairs into http GET format
)
xml = XmlSimple.xml_in(Net::HTTP.get(sent))
rank = xml["Response"].first["UrlInfoResult"].first["Alexa"].first["TrafficData"].first["Rank"].first.to_i
puts "processed: #{url} [#{rank}]"
list[url.gsub("http://","")] = rank
rescue
puts "error processing: #{url}"
list[url.gsub("http://","")] = "999999999 - no data"
end
list
end.sort_by {|a,b| b.to_i}
top100.slice(0,100).each do |url,rank|
puts "<li><a href='http://#{url}'>#{url}</a> [<a href='http://www.alexa.com/siteinfo/#{url}'>#{rank}</a>]</li>"
end
Compete
You can get a key at http://developer.compete.com
#!/usr/bin/env ruby
require 'rubygems'
require 'curb'
require 'active_support'
require 'pp'
require 'hpricot'
require 'uri'
class Compete
COMPETE_API_KEY = " YOU NEED COMPETE API KEY. See URL Above"
def self.rank(url)
url.gsub!(/http:///,'')
url = %Q{http://api.compete.com/fast-cgi/MI?d=#{url}&ver=3&apikey=#{COMPETE_API_KEY}&size=large}
html = Curl::Easy.perform(url).body_str
results = Hash.from_xml(html)
results["ci"]["dmn"]["rank"]["val"].gsub(",",'') rescue 99999999
end
end
class PBWiki
def self.fetch_urls
# uncomment this line to test changes without running up your AWIS bill
#return "http://twitter.com", "http://altrulist.org.uk"
rails100 = Hpricot( Curl::Easy.perform("http://rails100.pbwiki.com/Compete+Rankings").body_str)
uris = rails100.search("#displaycontent ol li a").collect do |url|
URI.parse(url.attributes["href"]).host
end.reject {|uri| uri =~ /alexa/}.uniq
end
end
urls = PBWiki.fetch_urls
top100 = urls.inject({}) do |list, url|
rank = Compete.rank(url)
sleep 1 #They have rate-limiting.. 1/s is safe
puts "processed: #{url} [#{rank}]"
list[url.gsub("http://","")] = rank
list
end.sort_by {|a,b| b.to_i}
top100.each do |url,rank|
puts "# [http://#{url} #{url}] [[http://siteanalytics.compete.com/#{url} #{rank}]]"
end
Quantcast
#!/usr/bin/env ruby
require 'open-uri'
require 'hpricot'
rails100 = Hpricot(open("http://rails100.pbwiki.com/Alexa+Rankings"))
uris = rails100.search("#wikipage-inner ol li a").collect do |url|
url.attributes["href"]
end.reject {|uri| uri =~ /alexa/}.uniq
top100 = uris.collect do |uri|
begin
host = uri.gsub(/^http:\/\//i, '')
resp = open("http://quantcast.com/#{host}")
resp = resp.readlines.join("\n")
match = resp.match(/top-sites-\d+\?r=(\d+)\#/i)
rank = match[1]
puts "#{uri} is ranked #{rank}"
[host, rank]
rescue Exception => e
nil
end
end.compact
top100.sort_by {|a,b| b.to_i}.slice(0,100).each do |url, rank|
puts "<li><a href='http://#{url}'>#{url}</a> [<a href='http://quantcast.com/#{url}'>#{rank}</a>]</li>"
end
Comments (0)
You don't have permission to comment on this page.