before = before.replace('','').replace('','') open('/home/pat/.skel/before','w').write(before).close() before = open('/home/pat/.skel/html.basic').read() before = before.replace('','').replace('','') open('/home/pat/.skel/before','w').write(before).close() before = open('/home/pat/.skel/html.basic').read() before = before.replace('','').replace('','') open('/home/pat/.skel/before','w').write(before).close() enel = u"Methanol Μεθανόλη".split() en, el = u"Methanol Μεθανόλη".split() en el zip(en,el) import translit en2el = Lexicon('corpora/en2el.txt') en2el = translit.Lexicon('corpora/en2el.txt') en2el.perfects 'Methanol' in [en for en,el in en2el.perfects] en2el.rules en2el.rule_model def score(word): pass def score(left,right): rules = zip(left,right) def normality(left,right): score = 0 rules = zip(left,right) for rule in rules: score += en2el.rules[rule] return score normality(en,el) from unicodedata import name for c in u"ΝΑΤΟ": print c, name(c) from translit import Lexicon en2el = Lexicon('corpora/en2el.txt') en2el.perfects [right for left, right in en2el.perfects] ' '.join([right for left, right in en2el.perfects]) set(' '.join([right for left, right in en2el.perfects])) sorted(set(' '.join([right for left, right in en2el.perfects]))) print ' '.join(sorted(set(' '.join([right for left, right in en2el.perfects])))) en2el = Lexicon('corpora/en2el.txt') from translit import Lexicon en2el = Lexicon('corpora/en2el.txt') en2el.left_alphabet print en2el.left_alphabet print en2el.right_alphabet len( en2el.right_alphabet) len( en2el.right_alphabet) / 2 from translit import Lexicon en2el = Lexicon('corpora/en2el.txt') print en2el.right_alphabet from translit import Lexicon en2el = Lexicon('corpora/en2el.txt') print en2el.right_alphabet print en2el.left_alphabet en2el.perfects d['Reason'] 'Reason', d['Reason'] zip('Reason', d['Reason']) for e,g in zip('Reason', d['Reason']): print e,g for e,g in zip('Reason', d['Reason']): print e,g for e,g in zip('Reason', d['Reason']): print e,g, en2el.rules.count((e,g)) def score(left,right): for e,g in zip(left,right): print e,g, en2el.rules.count((e,g)) d d['Reason'] score('Reason', d['Reason']) score('Methanol', d['Methanol']) score('Haircut', d['Haircut']) def both(w): return (w, d[w]) score(both('Mnemosyne')) (both('Mnemosyne')) both('Mnemosyne') score('Mnemosyne',d['Mnemosyne']) score('Mons',d['Mons']) score('Sofia',d['Sofia']) [x for x in rules if 'f' in x] [x for x in en2el.rules if 'f' in x] for a,b in [x for x in en2el.rules if 'f' in x]: print a,b for a,b in [x for x in en2el.rules if 'φ' in x]: print a,b for a,b in [x for x in en2el.rules if u'φ' in x]: print a,b en2el.left en2el.right.count(u'φ') for en,el in en2el.lexicon: if u'φ' in el: print en,el for en,el in en2el.lexicon: if u'φ' in el and 'f' not in en: print en,el for en,el in en2el.lexicon: if u'φ' in el and 'f' not in en and en2el.same_pattern(en,el): print en,el for en,el in en2el.lexicon: if u'φ' in el and 'f' not in en: print en,el for en,el in en2el.lexicon: if u'φ' in el and 'ph' in en: print en,el d['Alphabet']