require 'net/http'
require 'cgi'
require 'rubygems'
require 'progressbar'

urlsearch = 'http://twitter.com/tw/search/users?q=location%3A'
villes = IO.readlines('villesfrance.txt')
#villes = ['bordeaux', 'paris', 'nantes']

tabvilles = []

File.open("twitterFrance.err", "w") do |errfile|
  errfile.puts("")
end

def twittercount(urlville)
  ok = ""
  tries = 5
  nb = 0
  while (tries > 0) and (ok!="200")
    str = Net::HTTP.get_response(URI.parse(urlville))
    ok = str.code
  end
  if ok!="200"
    $stderr << "\n-------------erreur sur %s\n" % urlville
    $stderr << str
  else
    /Results 1 - [0-9]* of ([0-9]*) for/.match(str.body)
    nb = $1.to_i
  end
  return nb
end


barre = ProgressBar.new("villes twitter", villes.size*2)
villes.each do |ville|
  ville.strip!
  #puts ville
  urlville = urlsearch + CGI.escape(ville)
  nbinscrits = twittercount(urlville)
  barre.inc(1)

  urlville += "+france"
  nbdup = twittercount(urlville)
  barre.inc(1)

  if nbdup > nbinscrits
    $stderr << "incohérence sur %s\n" % ville
  end
  tabvilles << {'ville'=>ville, 'nbinscrits'=>nbinscrits, 'doublons'=>nbdup}
end

nb_tot = 0
nb_doublons = 0
tabvilles.sort_by {|statsville| statsville['nbinscrits']}.reverse_each do |statsville|
  puts "%s:%d:%d" % [statsville['ville'], statsville['nbinscrits'], statsville['doublons']]
  nb_tot += statsville['nbinscrits']
  nb_doublons += statsville['doublons']
end
nb_france = twittercount(urlsearch + "France") 
puts
puts "Total : %d" % nb_tot
puts "Doublons france : %d" % nb_doublons
puts "France : %d" % nb_france
puts "Grand total : %d" % (nb_france + nb_tot - nb_doublons)
puts "Estimation : %d" % ((nb_france + nb_tot - nb_doublons) * 2)

