#!/usr/bin/env ruby # -*- coding: utf-8 -*- ru = "/home/pat/repo/translit/corpora/en2ru.txt" lex = {} open(ru).map do |line| source, target = line.split(/\t/) lex[source.strip] = target.strip end def patternize word word.split(//).map {|c| word.index(c)} end pairs = [] lex.each { |source,target| if patternize(source) == patternize(target) source.split(//).zip(target.split(//)).each do |a,b| pairs.push [a,b] end end } freq = Hash.new {0} pairs.each {|pair| freq[pair] += 1 } puts freq