rails100

 

Generate

Page history last edited by Diego 3 mos ago

There are 3 ways to generate this list. One is Amazon's Alexa API, the Compete API, and by scraping Quantcast, which give different results.  Many sites are directly measured by quantcast, making those numbers much more reliable than either Alexa or Compete.

 

 

Amazon

 

Here is the code we use to generate the list. It uses Amazon's commercial AWIS API -- you must have an account and enter your access keys at the top to run this script!

 

If you want the 3-month average, use the 19th script block instead of the 17th (line 14)

 

Updated 3/29/08 - better error handling for sites that drop out of the rankings.

Update 07/09/08 - Only showing the top 100.  Also, better formatting of results so you can quickly and easily update the top100 page in source mode.

 

#!/usr/bin/env ruby

 

require 'rubygems'

require 'hpricot'

require 'open-uri'

require "cgi"

require "base64"

require "openssl"

require "digest/sha1"

require "uri"

require "net/https"

require "xmlsimple"

require "time"

 

ACCESS_KEY_ID = ""

SECRET_ACCESS_KEY = ""

 

action = "UrlInfo"

responseGroup = "Rank"

 

timestamp = ( Time::now ).utc.strftime("%Y-%m-%dT%H:%M:%S.000Z")

 

signature = Base64.encode64( OpenSSL::HMAC.digest( OpenSSL::Digest::Digest.new( "sha1" ), SECRET_ACCESS_KEY, action + timestamp)).strip

 

rails100 = Hpricot(open("http://rails100.pbwiki.com/Alexa+Rankings"))

 

uris = rails100.search("#wikipage-inner ol li a").collect do |url|

url.attributes["href"]

end.reject {|uri| uri =~ /alexa/}.uniq

# uncomment this line to test changes without running up your AWIS bill

# uris = "http://twitter.com", "http://altrulist.org.uk"

 

top100 = uris.inject({}) do |list, url|

begin

sent = URI.parse("http://awis.amazonaws.com?" +

{

"Action" => action,

"AWSAccessKeyId" => ACCESS_KEY_ID,

"Signature" => signature,

"Timestamp" => timestamp,

"ResponseGroup" => responseGroup,

"Url" => url

}.to_a.collect{|item| item.first + "=" + CGI::escape(item.last) }.join("&") # Put key value pairs into http GET format

)

 

xml = XmlSimple.xml_in(Net::HTTP.get(sent))

rank = xml["Response"].first["UrlInfoResult"].first["Alexa"].first["TrafficData"].first["Rank"].first.to_i

puts "processed: #{url} [#{rank}]"

list[url.gsub("http://","")] = rank

rescue

puts "error processing: #{url}"

list[url.gsub("http://","")] = "999999999 - no data"

end

list

end.sort_by {|a,b| b.to_i}

 

top100.slice(0,100).each do |url,rank|

puts "<li><a href='http://#{url}'>#{url}</a> [<a href='http://www.alexa.com/siteinfo/#{url}'>#{rank}</a>]</li>"
end

 

Compete

 

You can get a key at http://developer.compete.com

 

 

 


#!/usr/bin/env ruby
require 'rubygems'
require 'curb'
require 'active_support'
require 'pp'
require 'hpricot'
require 'uri'
class Compete
  COMPETE_API_KEY = " YOU NEED COMPETE API KEY. See URL Above"
  def self.rank(url)
      url.gsub!(/http:///,'')
      url = %Q{http://api.compete.com/fast-cgi/MI?d=#{url}&ver=3&apikey=#{COMPETE_API_KEY}&size=large}
      html =  Curl::Easy.perform(url).body_str
      results = Hash.from_xml(html)
      results["ci"]["dmn"]["rank"]["val"].gsub(",",'') rescue 99999999
  end
end

class PBWiki
  def self.fetch_urls
    # uncomment this line to test changes without running up your AWIS bill
    #return "http://twitter.com", "http://altrulist.org.uk" 
    rails100 = Hpricot( Curl::Easy.perform("http://rails100.pbwiki.com/Compete+Rankings").body_str)
    uris = rails100.search("#displaycontent ol li a").collect do |url|
     URI.parse(url.attributes["href"]).host
    end.reject {|uri| uri =~ /alexa/}.uniq
  end
end

urls = PBWiki.fetch_urls
top100 =  urls.inject({}) do |list, url|
  rank = Compete.rank(url)
  sleep 1  #They have rate-limiting.. 1/s is safe
  puts "processed: #{url} [#{rank}]"
  list[url.gsub("http://","")] = rank
  list
end.sort_by {|a,b| b.to_i}

top100.each do |url,rank|
  puts "# [http://#{url} #{url}] [[http://siteanalytics.compete.com/#{url} #{rank}]]"
end

 

Quantcast

 

#!/usr/bin/env ruby

require 'open-uri'

require 'hpricot'

rails100 = Hpricot(open("http://rails100.pbwiki.com/Alexa+Rankings"))

uris = rails100.search("#wikipage-inner ol li a").collect do |url|

  url.attributes["href"]

end.reject {|uri| uri =~ /alexa/}.uniq

top100 = uris.collect do |uri|

  begin

    host = uri.gsub(/^http:\/\//i, '')

    resp = open("http://quantcast.com/#{host}")

    resp = resp.readlines.join("\n")

    match = resp.match(/top-sites-\d+\?r=(\d+)\#/i)

    rank = match[1]

    puts "#{uri} is ranked #{rank}"

 

    [host, rank]

  rescue Exception => e

    nil

  end

end.compact

top100.sort_by {|a,b| b.to_i}.slice(0,100).each do |url, rank|

  puts "<li><a href='http://#{url}'>#{url}</a> [<a href='http://quantcast.com/#{url}'>#{rank}</a>]</li>"

end

 

Comments (0)

You don't have permission to comment on this page.