from glob import glob glob('../udhr/*.txt') hat = open('haitian.txt').read().decode('utf-8') hat print hat words = hat.split() from hrm import words words(hat) import hrm hrm.topwords for w in hrm.topwords: print w for w in hrm.topwords: print w for l in w: print unicodedata.name(l) import unicodedata for l in w: for w in hrm.topwords: print w for l in w: print unicodedata.name(l) hrm.udhr hrm.udhr.keys() for doc in hrm.udhr.values(): if 'uuny' in doc.words: print doc.language for doc in hrm.udhr.values(): for w in doc.words: if 'uuny' in doc.words: print doc.language for doc in hrm.udhr.values(): print doc.language for w in doc.words: if 'uuny' in doc.words: print doc.language import random random.choice(udhr) random.choice(hrm.udhr) {}.pop() help({}.pop()) help({}.pop) d = dict() help(d.popitem) dict(zip(list('abc'),list('ABC'))) d = dict(zip(list('abc'),list('ABC'))) d.popitem() d d.pop() x = d.popitem() d.update(x) help(d.update) def dchoice(d): x = d.popitem() d.update(dict(x)) return x d d = dict(zip(list('abc'),list('ABC'))) d dchoice(d) random.choice(d.items()) d for letter in u"mó̱makat": print letter for letter in u"mó̱makat": print letter, unicodedata.name(letter) dir() list(''.join(hrm.topwords)) hrm.freq(list(''.join(hrm.topwords))) sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]) for a, b in sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]): print a, b for a, b in sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]): try: n = name(b) print a,b,n except KeyError: print a,b for a, b in sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]): try: n = unicodedata.name(b) print a,b,n except KeyError: print a,b for a, b in sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]): try: print a,b,n for a, b in sorted([(v,k) for k,v in hrm.freq(list(''.join(hrm.topwords))).items()]): try: n = unicodedata.name(b) print a,b,n except ValueError: print a,b dir() hat unicodedata.category('.') unicodedata.category(u'.') unicodedata.category(u')') unicodedata.category(u'?') unicodedata.category(u'-') unicodedata.category(u'→') help(unicodedata.category) help(unicodedata)