#!/usr/bin/env python # -*- coding: utf-8 -*- # import fileinput import re import sys """ Given a file with a name like: ENGLISH Which contains the appropriate text from this Wikipedia page (see the bottom of this file for sample file content): http://en.wikipedia.org/wiki/Scrabble_letter_distributions For the corresponding language, generate code containing a Python dictionary representing the Scrabble "language model" of the language. (To compile several files into a single models.py, see buildmodels.sh) """ scores = {} def extracttiles(seq): """ >>> extracttiles(u'B ×2, C ×2, M ×2, P ×2') [u'B', u'C', u'M', u'P'] """ timessymbol = u'×' parts = seq.split() tiles = [part for part in parts if timessymbol not in part] return tiles for line in fileinput.input(): line = unicode(line) valRE = re.compile('[0-9]+') if 'point' in line and 'blank' not in line: val, letters = line.split(':') val = int(valRE.findall(val)[0]) letters = extracttiles(letters) for letter in letters: scores[letter] = val lgname = sys.argv[2] print '\n%s = {' % lgname for letter,value in scores.items(): print "\tu'%s': %s, " % (letter, value) print '}' """ # 2 blanks (zero points) # 1 point: E ×12, A ×9, I ×9, O ×8, R ×6, N ×6, T ×6, L ×4, S ×4 U ×4 # 2 points: D ×4, G ×3 # 3 points: B ×2, C ×2, M ×2, P ×2 # 4 points: F ×2, H ×2, V ×2, W ×2, Y ×2 # 5 points: K ×1 # 8 points: J ×1, X ×1 # 10 points: Q ×1, Z ×1 """