m math import sqrt def ngrams(s,n): """ >>> ngrams('abcde', 2) ['ab', 'bc', 'cd', 'de'] """ return [''.join(s[i:i+n]) for i in range(len(s)-n+1)] def freq(seq): fq = {} for elem in seq: try: fq[elem] += 1 except KeyError: fq[elem] = 1 return fq def scalar(vec): total = 0 for elem in vec: total += vec[elem] * vec[elem] return sqrt(total) def sim(v, w): total = 0 for elem in v: if elem in w: total += v[elem] * w[elem] return float(total) / (scalar(v) * scalar(w))