#!/usr/bin/env python # coding: utf-8 import sys import codecs cylex = open('cy_lex.txt').read().decode('utf-8') alpha, beta = sys.argv[1], sys.argv[2] id = [line for line in cylex.splitlines() if line.startswith(alpha)] en = [line for line in cylex.splitlines() if line.startswith(beta)] def lexify(lines): lex = {} for line in lines: bits = line.split('\t') if len(bits) == 3: lex[bits[1]] = bits[2] return lex enlex = lexify(en) idlex = lexify(id) enid = [] for cy in idlex: if cy in enlex: enid.append((idlex[cy], enlex[cy])) def dump(lex): filename = "%s2%s.txt" % (alpha, beta) out = codecs.open(filename,mode='w',encoding="utf-8") for i, e in enid: if i != e and ':' not in i and ':' not in e: out.write("%s\t%s\n" % (i, e)) out.close() dump(enid)