#!/usr/bin/env python # -*- coding: utf-8 -*- import sys from BeautifulSoup import UnicodeDammit from collection import Collection def uopen(fname): return UnicodeDammit(open(fname, 'U').read()).unicode def wordlist(text): return text.split() def makeindex(words): index = Collection() for nth, word in enumerate(words): length = len(words) interim = float(nth) / length * 100 + 1 index[word] = int(round(interim)) return index for word, nths in makeindex(wordlist(uopen(sys.argv[1]))).items(): print word, nths