from random import shuffle from string import ascii_lowercase as letters import sys from collections import defaultdict def encrypt(text, mapping): mapping[' '] = ' ' mapping[','] = ',' encrypted = '' for letter in text: if letter in mapping: encrypted += encode(letter,mapping) return encrypted def encode(letter, mapping): if letter.isupper(): return mapping[letter.lower()].upper() elif letter.islower(): return mapping[letter] else: return letter def freq(seq): d = defaultdict(int) for e in seq: d[e] += 1 return d def ngrams(text,n): return [text[i:i+n] for i in range(0,len(text)-n+1)] def unigrams(text): return ngrams(text,1) def bigrams(text): return ngrams(text,2) class Model: def __init__(self, text): self.text = text self.unigrams = unigrams(self.text) self.bigrams = bigrams(self.text) self.unigram_model = [(v,k) for k,v in sorted(freq(self.unigrams).items())] self.bigram_model = freq(bigrams(text)) self.mapping = self.genmap(letters) self.encrypted = encrypt(self.text, self.mapping) def genmap(self, original): alpha = list(original) randomized = alpha[:] shuffle(randomized) return dict(zip(alpha,randomized)) if __name__ == "__main__": model = Model(open('neuromancer.txt').read()) print model.encrypted[:1000] for letter, count in model.unigram_model: print letter, count