#!/usr/bin/env python # -*- coding: utf-8 -*- lexicon = open('ar_lex.txt') for line in lexicon: line = unicode(line) # optimistic; but I happen to know my source is utf-8 if line.startswith('fa'): try: code, arabicword, persianword = line.split('\t') # 'code' is just cruft in my lexicon if u"\N{ARABIC LETTER ALEF WITH HAMZA BELOW}" in arabicword and u"\N{ARABIC LETTER ALEF}" in persianword: print persianword print arabicword print #print persianword.replace(u"\N{ARABIC LETTER ALEF}", u"☺"), #print arabicword.replace(u"\N{ARABIC LETTER ALEF WITH HAMZA BELOW}", u"☻") #if (arabicword == persianword.replace(u"\N{ARABIC LETTER ALEF}", u"\N{ARABIC LETTER ALEF WITH HAMZA BELOW}")): except: continue """if u"\N{ARABIC LETTER ALEF}" in persianword and u"\N{ARABIC LETTER ALEF WITH HAMZA BELOW}" in arabicword: print persianword, arabicword #print arabicword, persianword except: continue """