#!/usr/bin/env python # coding: utf-8 import sys sys.path.append('/home/user') import codecs id2en = open('id2en.txt').read().decode('utf-8').splitlines() id2en = dict([line.split('\t') for line in id2en]).items() for e, i in id2en: ewords, iwords = tokenize(e), tokenize(i) if len(ewords)== 2 and len(iwords) == 2 and len(set(ewords).intersection(set(iwords))) == 2: print i, e