from collections import defaultdict from urllib import urlopen #ilo = urlopen('http://www.loaddoc.com/files/db/GaKoHvnpVkxI26756/GaKoHvnpVkxI26756.txt').read() latin = open('seneca.txt').read() words = latin.split() wl = set(words) d = defaultdict(list) for i,w in enumerate(words): d[w].append(i) freqs = sorted([(len(d[w]),w) for w in d]) print d