#!usr/bin/env ruby1.9
# -*- coding: utf-8 -*-
require 'cgi'
require 'rubygems'
require 'hpricot'
require 'open-uri'

def wikipedia_article_path title
 return CGI.escape(title).gsub('+','_')
end

def wiki_url language, title
  url = "http://%s.wikipedia.org/wiki/Special:Export/%s"
  return url % [language, wikipedia_article_path(title)]
end

def spider_wikipedia_text code, title
  url = wiki_url(code, title)
  doc = Hpricot(open(url, {'User-Agent'=>'Innocentrobot'}))
  return doc.search('text').inner_html
end

class String
  def sentences
    split(/(\.)/)
  end
end

class WikipediaArticle
  attr_accessor :code, :title, :text, :sentences

  def initialize code, title
    @code = code
    @title = title
    @text = spider_wikipedia_text(@code, @title)
    @sentences = @text.sentences
  end
end

en = WikipediaArticle.new('en', 'House')
puts en.code, en.title
puts en.text

en.sentences.each {  |sent| puts sent }


