#!/usr/bin/env python # -*- coding: utf-8 -*- from BeautifulSoup import UnicodeDammit eng = open('udhr_eng.txt').read() eng = unicode(eng) eng = UnicodeDammit(eng).unicode def ler(code): text = open('udhr_' + code + '.txt').read() return UnicodeDammit(text).unicode por = ler('por') eng = ler('eng') def average(series): sum = 0 [sum += n for n in series] return float(sum) / len(sum) words = {} words[por] = por.split() [len(w) for w in words[por]]