#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re

en = unicode(open('brazilbiz-en.txt').read())
pt = unicode(open('brazilbiz-pt.txt').read())

whitespaceRE = re.compile("\s+", re.UNICODE)

def squeeze(text):
    text = text.rstrip()
    text = text.lstrip()
    return ' '.join(re.split(whitespaceRE, text))
    return text

en = squeeze(en)
pt = squeeze(pt)

enwords = en.split()
ptwords = pt.split()

# this is incredibly inefficient, oh well.
ptdex = [(i,w) for (i,w) in enumerate(ptwords)]
endex = [(i,w) for (i,w) in enumerate(enwords)]

ptlen = len(ptdex)
enlen = len(endex)

#print len([w[0] for w in endex if w[1] == u'business'])
#print len([w[0] for w in ptdex if w[1] == u'negócios'])
print len([w[0] for w in endex if w[1] == u'commerce'])
print len([w[0] for w in ptdex if w[1] == u'comércio'])
