from wikialign_outline import * from wikialign import download_article import re link_pattern = """\[\[ # [[ ( # title [^\]\|\:]+ # several of anything but ]|: ) # end title \|? # bar? ( # anchor? [^\]\[\|\:]+ )? # end anchor? \]\] # ]] """ linkRE = re.compile(link_pattern, re.VERBOSE) rennes_fr = download_article('fr', 'France') rennes_en = download_article('en', 'France') fr = split_into_sentences(rennes_fr) en = split_into_sentences(rennes_en) def links(sentence): return set(linkRE.findall(sentence)) def linked(sentence): return sentence, links(sentence) from random import choice e = choice(en) e_linked = linked(e)