#from urllib import urlopen from codecs import getwriter import sys sys.stdout = getwriter('utf-8')(sys.stdout) #ind = urlopen('http://www.unicode.org/udhr/d/udhr_ind.txt').read().decode('utf-8').split() ind = open('/home/pat/repo/udhr/udhr_ind.txt').read().decode('utf-8').split() print "Indonesian sample: " print ' '.join(ind[:100]) print #eng = urlopen('http://www.unicode.org/udhr/d/udhr_eng.txt').read().decode('utf-8').split() eng = open('/home/pat/repo/udhr/udhr_eng.txt').read().decode('utf-8').split() print "English sample: " print ' '.join(eng[:100]) print print 'apa' in ' '.join(ind).lower() print 'kabar' in ' '.join(ind).lower()