#!/usr/bin/env python # -*- coding: utf-8 -*- from glob import glob class Document: def __init__(self, filename, langname=None): self.filename = filename self.langname = self.filename2code() def filename2code(self): return self.filename class UDHRDocument(Document): def filename2code(self): return self.filename.replace('udhr_','').replace('.txt','') class Corpus: def __init__(self, filepaths): self.filepaths = filepaths self.docs = self.readfiles() def readfiles(self): return [UDHRDocument(doc) for doc in self.filepaths] files = glob('udhr_*.txt') print files c = Corpus(files)