import shutil shutil.os.listdir('../udhr/') shutil.os.listdir('../udhr/*.txt') from glob import glob glob('../udhr/*.txt') def differences(series): return [abs(y-x) for x,y in bigrams(series)] def bigrams(series): return ngrams(series,2) def ngrams(series): return [series[i:i+n] for i in range(len(series)-n+1)] import random random.normalvariate random.normalvariate() random.normalvariate(0,10,1) random.normalvariate(0,10) dir(random) random.uniform() random.uniform(0,10) help(random.uniform) [random.uniform(10) for i in range(10)] [random.uniform(0,10) for i in range(10)] [math.floor(random.uniform(0,10)) for i in range(10)] import math [math.floor(random.uniform(0,10)) for i in range(10)] [int(random.uniform(0,10)) for i in range(10)] dir() import hrm hrm.docs['huu'] hrm.udhr['huu'] hrm.udhr['huu'].topword print hrm.udhr['huu'].topword print hrm.udhr['huu'].byfreq print hrm.udhr['huu'].wordsbyfreq for a,b in sorted( hrm.udhr['huu'].wordsbyfreq): print a.b for a,b in sorted( hrm.udhr['huu'].wordsbyfreq): print a,b for a,b in sorted( hrm.udhr['abc'].wordsbyfreq): print a,b for a,b in sorted( hrm.udhr['abb'].wordsbyfreq): print a,b for a,b in sorted( hrm.udhr['aob'].wordsbyfreq): print a,b for a,b in sorted( hrm.udhr['boa'].wordsbyfreq): print a,b dir( hrm.udhr['boa']) def span(w,f): return pass def partition(series): pass hat hat = open('haitian.txt').read() hat = unicode(hat) hat = hat.decode('utf-8') print hat hrm.words(hat) hatwords = hrm.words(hat) hatwords = hrm.words(hrm.depunc(hat)) hatwords freq(hatwords) hrm.freq(hatwords) hatfq = hrm.freq(hatwords) html = u'' for w in hatwords: pass spans = [] def span(w,f): pass fqs = [fq for w,fq in hatfq.items()] fqs max(fqs) m = max(fqs) for w,f in hatfqs.items(): w,f for w,f in hatfq.items(): w,f for w,f in hatfq.items(): w, f/m*100 for w,f in hatfq.items(): w, f/float(m)*100 [( w, f/float(m)*100) ] [( w, f/float(m)*100) for w,f in hatfq.items()] dict([( w, f/float(m)*100) for w,f in hatfq.items()]).values() max(dict([( w, f/float(m)*100) for w,f in hatfq.items()]).values()) min(dict([( w, f/float(m)*100) for w,f in hatfq.items()]).values()) ' '.join(sorted([ random.choice(words) for i in range(50)])) import random ' '.join(sorted([ random.choice(words) for i in range(50)])) import random random.uniform(0,10) random.uniform(0,10)*10 int(random.uniform(0,10)*10) random.randint(10) random.randint(0,10) random.randint(0,147) [random.randint(0,147) for i in range(100)] nums = [random.randint(0,147) for i in range(100)] sorted(nums) snums = sorted(nums) snums open('uh','w').write(snums) open('uh','w').write(' '.join(snums)) open('uh','w').write(' '.join([str(i) for i in snums])) hatwords [len(w) for w in hatwords] [len(w) for w in hatwords][:100] snums2 = [len(w) for w in hatwords][:100] open('oe','w').write(' '.join([str(i) for i in snums2])) snums2 = sorted(snums2) open('oe','w').write(' '.join([str(i) for i in snums2]))