#!/usr/bin/env python # coding: utf-8 import sys sys.path.append('/home/user') import codecs lexfile = sys.argv[1] id2en = open(lexfile).read().decode('utf-8').splitlines() id2en = dict([line.split('\t') for line in id2en]) idvocab = set() envocab = set() for i, e in id2en.items(): for iword in i.split(): idvocab.add(iword) for eword in e.split(): envocab.add(eword) for i in sorted(idvocab): print i