#!/usr/bin/env python # -*- coding: utf-8 -*- import re en = unicode(open('brazilbiz-en.txt').read()) pt = unicode(open('brazilbiz-pt.txt').read()) whitespaceRE = re.compile("\s+", re.UNICODE) def squeeze(text): text = text.rstrip() text = text.lstrip() return ' '.join(re.split(whitespaceRE, text)) return text en = squeeze(en) pt = squeeze(pt) ep = zip(en,pt) print ep[2000:2200]