require 'net/http' require 'cgi' require 'rubygems' require 'progressbar' urlsearch = 'http://twitter.com/tw/search/users?q=location%3A' villes = IO.readlines('villesfrance.txt') #villes = ['bordeaux', 'paris', 'nantes'] tabvilles = [] File.open("twitterFrance.err", "w") do |errfile| errfile.puts("") end def twittercount(urlville) ok = "" tries = 5 nb = 0 while (tries > 0) and (ok!="200") str = Net::HTTP.get_response(URI.parse(urlville)) ok = str.code end if ok!="200" $stderr << "\n-------------erreur sur %s\n" % urlville $stderr << str else /Results 1 - [0-9]* of ([0-9]*) for/.match(str.body) nb = $1.to_i end return nb end barre = ProgressBar.new("villes twitter", villes.size*2) villes.each do |ville| ville.strip! #puts ville urlville = urlsearch + CGI.escape(ville) nbinscrits = twittercount(urlville) barre.inc(1) urlville += "+france" nbdup = twittercount(urlville) barre.inc(1) if nbdup > nbinscrits $stderr << "incohérence sur %s\n" % ville end tabvilles << {'ville'=>ville, 'nbinscrits'=>nbinscrits, 'doublons'=>nbdup} end nb_tot = 0 nb_doublons = 0 tabvilles.sort_by {|statsville| statsville['nbinscrits']}.reverse_each do |statsville| puts "%s:%d:%d" % [statsville['ville'], statsville['nbinscrits'], statsville['doublons']] nb_tot += statsville['nbinscrits'] nb_doublons += statsville['doublons'] end nb_france = twittercount(urlsearch + "France") puts puts "Total : %d" % nb_tot puts "Doublons france : %d" % nb_doublons puts "France : %d" % nb_france puts "Grand total : %d" % (nb_france + nb_tot - nb_doublons) puts "Estimation : %d" % ((nb_france + nb_tot - nb_doublons) * 2)