#!/usr/bin/env python """ unify.py -- for those days when you're banging your head on the table and screaming "ARGH JUST MAKE THIS FILE UTF-8 KTHX!" limitations: - only works from stdin - reads all the content into memory; could be a problem with a lot of content But those limitations are nothing compared to those freaking UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 48: ordinal not in range(128) And anyway, they can be programmed around, once you're familiar with the UnicodeDammit class, which is manna from heaven. As all functionality is stolen from these libraries, you'll be needing: http://www.crummy.com/software/BeautifulSoup/ - Leonard Richardson http://chardet.feedparser.org/ - Mark Pilgrim """ import sys sys.path.append("/home/pat/.pylib/") from BeautifulSoup import UnicodeDammit import codecs sys.stdout = codecs.getwriter('utf-8')(sys.stdout) def unify(text): return UnicodeDammit(text).unicode if __name__ == "__main__": content = sys.stdin.read() content = UnicodeDammit(content).unicode print content