Merge pull request #120 from indentlabs/master

Use S3's static-public-assets bucket instead of louismullie.com
Make nokogiri a required dependency, #121
2017-05-05 18:31:43 -04:00 · 2017-04-22 16:15:35 -04:00 · 2016-05-24 14:04:48 -05:00 · 2016-05-24 13:20:17 -05:00 · 2016-05-24 13:19:51 -05:00 · 2015-12-07 21:17:40 -05:00
130 changed files with 3935 additions and 2664 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,4 +13,5 @@
 *.yaml
 spec/sandbox.rb
 coverage/*
 benchmark/*
 TODO
--- a/.rspec
+++ b/.rspec
@ -1,2 +1,2 @@
--format s -c
+--format d -c
 --order rand
--- a/.travis.yml
+++ b/.travis.yml
@ -1,11 +1,18 @@
 language: ruby
 rvm:
  - 1.9.2
  - 1.9.3
  - 2.0
  - 2.1
  - 2.2
 before_install:
  - export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386/"
-before_script: 
+
 before_script:
  - sudo apt-get install antiword
  - sudo apt-get install poppler-utils
  - rake treat:install[travis] --trace
-script: rake treat:spec --trace
+
 script: rake treat:spec --trace
--- a/.treat
+++ b/.treat
@ -0,0 +1,35 @@
 # A boolean value indicating whether to silence 
 # the output of external libraries (e.g. Stanford 
 # tools, Enju, LDA, Ruby-FANN, Schiphol).
 Treat.core.verbosity.silence = false
 # A boolean value indicating whether to explain 
 # the steps that Treat is performing.
 Treat.core.verbosity.debug = true
 # A boolean value indicating whether Treat should 
 # try to detect the language of newly input text.
 Treat.core.language.detect = false
 # A string representing the language to default 
 # to when detection is off.
 Treat.core.language.default = 'english'
 # A symbol representing the finest level at which
 # language detection should be performed if language
 # detection is turned on.
 Treat.core.language.detect_at = :document
 # The directory containing executables and JAR files.
 Treat.paths.bin = '##_INSTALLER_BIN_PATH_##'
 # The directory containing trained models
 Treat.paths.models = '##_INSTALLER_MODELS_PATH_##'
 # Mongo database configuration.
 Treat.databases.mongo.db = 'your_database'
 Treat.databases.mongo.host = 'localhost'
 Treat.databases.mongo.port = '27017'
 # Include the DSL by default.
 include Treat::Core::DSL
--- a/57
+++ b/57
@ -1,48 +1,45 @@
-source :rubygems
+source 'https://rubygems.org'
 gemspec
 gem 'birch'
 gem 'schiphol'
-gem 'sourcify'
+gem 'yomu'
 gem 'ruby-readability'
 gem 'nokogiri'
 group :test do
-  gem 'rspec', '2.9.0'
+  gem 'rspec'
  gem 'rake'
  gem 'terminal-table'
  gem 'simplecov'
 end
 =begin
 gem 'nokogiri'
 gem 'psych'
 gem 'mongoid'
 gem 'mongo'
 gem 'bson_ext'
 gem 'zip'
 gem 'ferret'
 gem 'lda-ruby'
 gem 'stanford-core-nlp'
 gem 'linguistics'
-gem 'ruby-readability'
+gem 'engtagger'
-gem 'whatlanguage'
+gem 'open-nlp'
-gem 'chronic'
+gem 'stanford-core-nlp'
-gem 'nickel'
+gem 'rwordnet'
 gem 'scalpel'
 gem 'fastimage'
 gem 'decisiontree'
-gem 'rb-libsvm'
+gem 'whatlanguage'
-gem 'ai4r'
+gem 'zip'
 gem 'nickel'
 gem 'tactful_tokenizer'
 gem 'srx-english'
 gem 'punkt-segmenter'
 gem 'chronic'
 gem 'uea-stemmer'
 gem 'rbtagger'
 gem 'ruby-stemmer'
 gem 'punkt-segmenter'
 gem 'tactful_tokenizer'
 gem 'nickel'
 gem 'rwordnet'
 gem 'uea-stemmer'
 gem 'engtagger'
 gem 'activesupport'
-gem 'srx-english'
+gem 'rb-libsvm'
-gem 'scalpel'
+gem 'tomz-liblinear-ruby-swig'
-=end
+gem 'ruby-fann'
-
+gem 'fuzzy-string-match'
-# english?
+gem 'levenshtein-ffi'
 gem 'tf-idf-similarity'
 gem 'kronic'
 =end
--- a/4
+++ b/4
@ -1,4 +1,4 @@
-Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.2
+Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 2.0.0
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 Author: Louis-Antoine Mullie (louis.mullie@gmail.com). Copyright 2011-12.
-Non-trivial amount of code has been incorporated and modified from other libraries:
+A non-trivial amount of code has been incorporated and modified from other libraries:
 - formatters/readers/odt.rb - Mark Watson (GPL license)
 - processors/tokenizers/tactful.rb - Matthew Bunday (GPL license)
--- a/README.md
+++ b/README.md
@ -1,35 +1,43 @@
 [![Build Status](https://secure.travis-ci.org/louismullie/treat.png)](http://travis-ci.org/#!/louismullie/treat)
-[![Dependency Status](https://gemnasium.com/louismullie/treat.png)](https://gemnasium.com/louismullie/treat)
+[![Code Climate](https://codeclimate.com/github/louismullie/treat.png)](https://codeclimate.com/github/louismullie/treat)
 [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/louismullie/treat)
-Treat is a toolkit for natural language processing and computational linguistics in Ruby. The Treat project aims to build a language- and algorithm- agnostic NLP framework for Ruby with support for tasks such as document retrieval, text chunking, segmentation and tokenization, natural language parsing, part-of-speech tagging, keyword extraction and named entity recognition.
+![Treat Logo](http://www.louismullie.com/treat/treat-logo.jpg)
-**Current features**
+**New in v2.0.5: [OpenNLP integration](https://github.com/louismullie/treat/commit/727a307af0c64747619531c3aa355535edbf4632) and [Yomu support](https://github.com/louismullie/treat/commit/e483b764e4847e48b39e91a77af8a8baa1a1d056)**
 Treat is a toolkit for natural language processing and computational linguistics in Ruby. The Treat project aims to build a language- and algorithm- agnostic NLP framework for Ruby with support for tasks such as document retrieval, text chunking, segmentation and tokenization, natural language parsing, part-of-speech tagging, keyword extraction and named entity recognition. Learn more by taking a [quick tour](https://github.com/louismullie/treat/wiki/Quick-Tour) or by reading the [manual](https://github.com/louismullie/treat/wiki/Manual).
 **Features**
 * Text extractors for PDF, HTML, XML, Word, AbiWord, OpenOffice and image formats (Ocropus).
 * Text retrieval with indexation and full-text search (Ferret).
 * Text chunkers, sentence segmenters, tokenizers, and parsers (Stanford & Enju).
 * Word inflectors, including stemmers, conjugators, declensors, and number inflection.
 * Lexical resources (WordNet interface, several POS taggers for English).
 * Language, date/time, topic words (LDA) and keyword (TF*IDF) extraction.
 * Word inflectors, including stemmers, conjugators, declensors, and number inflection.
 * Serialization of annotated entities to YAML, XML or to MongoDB.
 * Visualization in ASCII tree, directed graph (DOT) and tag-bracketed (standoff) formats.
 * Linguistic resources, including language detection and tag alignments for several treebanks.
-* Machine learning (decision tree, multilayer perceptron, linear, support vector machines).
+* Machine learning (decision tree, multilayer perceptron, LIBLINEAR, LIBSVM).
 * Text retrieval with indexation and full-text search (Ferret).
-<br>
+**Contributing**
-**Resources**
+I am actively seeking developers that can help maintain and expand this project. You can find a list of ideas for contributing to the project [here](https://github.com/louismullie/treat/wiki/Contributing).
-* Read the [latest documentation](http://rubydoc.info/github/louismullie/treat/frames).
+**Authors**
-* See how to [install Treat](https://github.com/louismullie/treat/wiki/Installation).
+
-* Learn how to [use Treat](https://github.com/louismullie/treat/wiki/Manual).
+Lead developper: @louismullie [[Twitter](https://twitter.com/LouisMullie)]
-* Help out by [contributing to the project](https://github.com/louismullie/treat/wiki/Contributing).
+
-* View a list of [papers](https://github.com/louismullie/treat/wiki/Papers) about tools included in this toolkit.
+Contributors:
-* Open an [issue](https://github.com/louismullie/treat/issues).
+
- 
+- @bdigital
-<br>
+- @automatedtendencies
 - @LeFnord
 - @darkphantum
 - @whistlerbrk
 - @smileart
 - @erol
 **License**
-This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
+This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
--- a/7
+++ b/7
@ -48,4 +48,9 @@ Treat - Text Retrieval, Extraction and Annotation Toolkit
  * Added LIBSVM and LIBLINEAR classifier support.
  * Added support for serialization of documents and data sets to MongoDB.
  * Added specs for most of the core classes.
-  * Several bug fixes.
+  * Several bug fixes.
 2.0.0rc1
  * MAJOR CHANGE: the old DSL is no longer supported. A new DSL style using
    lowercase keywords is now used and must be required explicitly.
--- a/16
+++ b/16
@ -40,20 +40,8 @@ namespace :treat do
  task :spec, [:language] do |t, args|
    require_relative 'spec/helper'
    Treat::Specs::Helper.start_coverage
-    Treat::Specs::Helper.run_core_specs
+    Treat::Specs::Helper.run_library_specs
-    Treat::Specs::Helper.run_examples_as(
+    Treat::Specs::Helper.run_language_specs(args.language)
    'spec', args.language)
  end
  # Runs worker benchmarks for all languages (by 
  # default), or for a specific language (if supplied).
  # Also outputs an HTML table 
  # Syntax: rake treat:benchmark (all languages)
  # - OR -  rake treat:benchmark[language]
  task :benchmark, [:language] do |t, args|
    require_relative 'spec/helper'
    Treat::Specs::Helper.run_examples_as(
    'benchmark', args.language)
  end
 end
--- a/lib/treat.rb
+++ b/lib/treat.rb
@ -18,5 +18,6 @@ module Treat
  require_relative 'treat/exception'
  require_relative 'treat/autoload'
  require_relative 'treat/modules'
  require_relative 'treat/builder'
 end
--- a/lib/treat/autoload.rb
+++ b/lib/treat/autoload.rb
@ -14,21 +14,31 @@ module Treat::Autoload
  # Loads all the files for the base
  # module in the appropriate order.
  def self.included(base)
-    # Get the parts of module name.
+    m = self.get_module_name(base)
-    bits = base.to_s.split('::')
+    d = self.get_module_path(m)
-    # Singularize the module name.
+    n = self.singularize(m) + '.rb'
-    w = bits[-1].downcase
+    f, p = File.join(d, n), "#{d}/*.rb"
-    n = (w[-3..-1] == 'ies' ? 
+    require f if File.readable?(f)
-    (w[0..-4] + 'y') : (w[-1] == 
+    Dir.glob(p).each { |f| require f }
-    's' ? w[0...-1] : w)) + '.rb'
+  end
-    # Get the module's directory.
+
-    d = File.dirname(File.
+  # Returns the path to a module's dir.
-    expand_path(__FILE__))[0..-6] +  
+  def self.get_module_path(name)
-    bits.join('/').downcase + '/'
+    file = File.expand_path(__FILE__)
-    # Require base class if exists.
+    dirs = File.dirname(file).split('/')
-    require d + n if File.readable?(d + n)
+    File.join(*dirs[0..-1], name)
-    # Require all other files in dir.
+  end
-    Dir.glob("#{d}*.rb").each { |f| require f }
+  
  # Return the downcased form of the
  # module's last name (e.g. "entities").
  def self.get_module_name(mod)
    mod.to_s.split('::')[-1].downcase
  end
  # Helper method to singularize words.
  def self.singularize(w)
    if w[-3..-1] == 'ies'; w[0..-4] +  'y'
    else; (w[-1] == 's' ? w[0..-2] : w); end
  end
 end
--- a/lib/treat/builder.rb
+++ b/lib/treat/builder.rb
@ -0,0 +1,6 @@
 class Treat::Builder
  include Treat::Core::DSL
  def initialize(&block)
    instance_exec(&block)
  end
 end
--- a/lib/treat/config/config.rb
+++ b/lib/treat/config/config.rb
@ -3,71 +3,36 @@
 # the /config folder.
 module Treat::Config
  # Require configurable mix in.
  require_relative 'importable'
  # Make all configuration importable.
  extend Treat::Config::Importable
  # Core configuration options for entities.
  class Treat::Config::Entities; end
  # Configuration for paths to models, binaries,
  # temporary storage and file downloads.
  class Treat::Config::Paths; end
  # Configuration for all Treat workers.
  class Treat::Config::Workers; end
  # Helpful linguistic options.
  class Treat::Config::Linguistics; end
  # Supported workers for each language.
  class Treat::Config::Languages; end
  # Configuration options for external libraries.
  class Treat::Config::Libraries; end
  class Treat::Config::Workers; end
  # Configuration options for database 
  # connectivity (host, port, etc.)
  class Treat::Config::Databases; end
  # Configuration options for Treat core.
  class Treat::Config::Core; end
  # Require autolodable mix in.
  require_relative 'configurable'
  # Store all the configuration in self.config
  class << self; attr_accessor :config; end
  # Setup a proxy on the main Treat module to 
  # make configuration options directly accessible,
  # using e.g. Treat.paths.tmp = '...'
  Treat.module_eval do
    # Handle all missing methods as conf options.
    # Instead, should dynamically define them. FIXME.
    def self.method_missing(sym, *args, &block)
      super(sym, *args, &block) if sym == :to_ary
      Treat::Config.config[sym]
    end
  end
  # Main function; loads all configuration options.
  def self.configure!
    config = {}
    Treat::Config.constants.each do |const|
      unless const == :Configurable
        klass = Treat::Config.const_get(const)
        klass.class_eval do
          extend Treat::Config::Configurable
        end
        k = const.to_s.downcase.intern
        klass.configure!
        config[k] = klass.config
      end
    end
    self.config = self.hash_to_struct(config)
  end
  # * Helper methods * #
  # Convert a hash to nested structs.
  def self.hash_to_struct(hash)
    return hash if hash.keys.
    select { |k| !k.is_a?(Symbol) }.size > 0
    struct = Struct.new(*hash.keys).new(*hash.values)
    hash.each do |key, value|
      if value.is_a?(Hash)
        struct[key] = self.hash_to_struct(value)
      end
    end; return struct
  end
 end
--- a/lib/treat/config/configurable.rb
+++ b/lib/treat/config/configurable.rb
@ -1,10 +1,29 @@
 # Provide default functionality to load configuration
 # options from flat files into their respective modules.
 module Treat::Config::Configurable
-
+  
  # When extended, add the .config property to
  # the class that is being operated on.
  def self.extended(base)
    class << base; attr_accessor :config; end
    base.class_eval { self.config = {} }
  end
  # Provide base functionality to configure 
  # all modules. The behaviour is as follows:
  # 
  # 1 - Check if a file named data/$CLASS$.rb
  # exists; if so, load that file as the base 
  # configuration, i.e. "Treat.$CLASS$"; e.g. 
  # "Treat.core"
  # 
  # 2 - Check if a folder named data/$CLASS$
  # exists; if so, load each file in that folder
  # as a suboption of the main configuration,
  # i.e. "Treat.$CLASS$.$FILE$"; e.g. "Treat.workers"
  # 
  # (where $CLASS$ is the lowercase name of 
  # the concrete class being extended by this.)
  def configure!
    path = File.dirname(File.expand_path(         # FIXME
    __FILE__)).split('/')[0..-4].join('/') + '/'
@ -14,15 +33,19 @@ module Treat::Config::Configurable
    base_file = main_dir + mod_name + '.rb'
    if File.readable?(base_file)
      self.config = eval(File.read(base_file))
-    end
+    elsif FileTest.directory?(conf_dir)
-    if FileTest.directory?(conf_dir)
+      self.config = self.from_dir(conf_dir)
-      config = {}
+    else; raise Treat::Exception,
-      Dir[conf_dir + '/*'].each do |path|
+      "No config file found for #{mod_name}."
        name = File.basename(path, '.*').intern
        config[name] = eval(File.read(path))
      end
      self.config = config
    end
  end
  # * Helper methods for configuraton * #
  def from_dir(conf_dir)
    Hash[Dir[conf_dir + '/*'].map do |path|
      name = File.basename(path, '.*').intern
      [name, eval(File.read(path))]
    end]
  end
 end
--- a/lib/treat/config/data/config.rb
+++ b/lib/treat/config/data/config.rb
@ -1,50 +0,0 @@
 {acronyms: 
  ['xml', 'html', 'txt', 'odt',
  'abw', 'doc', 'yaml', 'uea',
  'lda', 'pdf', 'ptb', 'dot',
  'ai', 'id3', 'svo', 'mlp',
  'svm', 'srx'],
 encodings: 
  {language_to_code: {
    arabic: 'UTF-8',
    chinese: 'GB18030',
    english: 'UTF-8',
    french: 'ISO_8859-1',
    ferman: 'ISO_8859-1',
    hebrew: 'UTF-8'
 }},
 entities: 
    {list: 
      [:entity, :unknown, :email, 
       :url, :symbol, :sentence, 
       :punctuation, :number, 
       :enclitic, :word, :token, 
       :fragment, :phrase, :paragraph, 
       :title, :zone, :list, :block, 
       :page, :section, :collection, 
       :document],
    order: 
      [:token, :fragment, :phrase, 
       :sentence, :zone, :section, 
       :document, :collection]},
    language: {
      default: :english, 
      detect: false, 
      detect_at: :document
    },
    paths: {
      description: {
        tmp: 'temporary files',
        lib: 'class and module definitions',
        bin: 'binary files',
        files: 'user-saved files',
        models: 'model files',
        spec: 'spec test files'
      }
    },
 syntax: { sweetened: false },
 verbosity: { debug: false, silence: true}}
--- a/lib/treat/config/data/core.rb
+++ b/lib/treat/config/data/core.rb
@ -4,7 +4,7 @@
    'abw', 'doc', 'yaml', 'uea',
    'lda', 'pdf', 'ptb', 'dot',
    'ai', 'id3', 'svo', 'mlp',
-    'svm', 'srx'],
+    'svm', 'srx', 'nlp'],
  encodings: 
    {language_to_code: {
@ -21,13 +21,13 @@
        [:entity, :unknown, :email, 
         :url, :symbol, :sentence, 
         :punctuation, :number, 
-         :enclitic, :word, :token, 
+         :enclitic, :word, :token, :group,
         :fragment, :phrase, :paragraph, 
         :title, :zone, :list, :block, 
         :page, :section, :collection, 
         :document],
      order: 
-        [:token, :fragment, :phrase, 
+        [:token, :fragment, :group, 
         :sentence, :zone, :section, 
         :document, :collection]},
      language: {
@ -45,7 +45,9 @@
          spec: 'spec test files'
        }
      },
-    
+  learning: {
    list: [:data_set, :export, :feature, :tag, :problem, :question]
  },
  syntax: { sweetened: false },
  verbosity: { debug: false, silence: true}
--- a/lib/treat/config/data/languages/agnostic.rb
+++ b/lib/treat/config/data/languages/agnostic.rb
@ -1,21 +1,12 @@
 {
  dependencies: [
-    'psych',
+    'ferret', 'bson_ext', 'mongo', 'lda-ruby',
-    'nokogiri',
+    'stanford-core-nlp', 'linguistics',
-    'ferret',
+    'ruby-readability', 'whatlanguage',
-    'bson_ext',
+    'chronic', 'kronic', 'nickel', 'decisiontree',
-    'mongo',
+    'rb-libsvm', 'ruby-fann', 'zip', 'loggability',
-    'lda-ruby',
+    'tf-idf-similarity', 'narray', 'fastimage',
-    'stanford-core-nlp',
+    'fuzzy-string-match', 'levenshtein-ffi'
    'linguistics',
    'ruby-readability',
    'whatlanguage',
    'chronic',
    'nickel',
    'decisiontree',
    'rb-libsvm',
    'ai4r',
    'zip'
  ],
  workers: {
    learners: {
@ -25,7 +16,9 @@
      keywords: [:tf_idf],
      language: [:what_language],
      topic_words: [:lda],
-      tf_idf: [:native]
+      tf_idf: [:native],
      distance: [:levenshtein],
      similarity: [:jaro_winkler, :tf_idf]
    },
    formatters: {
      serializers: [:xml, :yaml, :mongo],
--- a/lib/treat/config/data/languages/english.rb
+++ b/lib/treat/config/data/languages/english.rb
@ -14,7 +14,7 @@
  ],
  workers: {
    extractors: {
-      time: [:chronic, :ruby, :nickel],
+      time: [:chronic, :kronic, :ruby, :nickel],
      topics: [:reuters],
      name_tag: [:stanford]
    },
@ -32,28 +32,64 @@
    },
    processors: {
      parsers: [:stanford],
-      segmenters: [:srx, :tactful, :punkt, :stanford, :scalpel],
+      segmenters: [:scalpel, :srx, :tactful, :punkt, :stanford],
-      tokenizers: [:ptb, :stanford, :punkt]
+      tokenizers: [:ptb, :stanford, :punkt, :open_nlp]
    }
  },
  stop_words:
-    ['the', 'of', 'and', 'a', 'to', 'in', 'is',
+    [
-    'you', 'that', 'it', 'he', 'was', 'for', 'on',
+      "about",
-    'are', 'as', 'with', 'his', 'they', 'I', 'at',
+      "also",
-    'be', 'this', 'have', 'from', 'or', 'one', 'had',
+      "are",
-    'by', 'word', 'but', 'not', 'what', 'all', 'were',
+      "away",
-    'we', 'when', 'your', 'can', 'said', 'there', 'use',
+      "because",
-    'an', 'each', 'which', 'she', 'do', 'how', 'their',
+      "been",
-    'if', 'will', 'up', 'other', 'about', 'out', 'many',
+      "beside",
-    'then', 'them', 'these', 'so', 'some', 'her', 'would',
+      "besides",
-    'make', 'like', 'him', 'into', 'time', 'has', 'look',
+      "between",
-    'two', 'more', 'write', 'go', 'see', 'number', 'no',
+      "but",
-    'way', 'could', 'people', 'my', 'than', 'first', 'been',
+      "cannot",
-    'call', 'who', 'its', 'now', 'find', 'long', 'down',
+      "could",
-    'day', 'did', 'get', 'come', 'made', 'may', 'part',
+      "did",
-    'say', 'also', 'new', 'much', 'should', 'still',
+      "etc",
-    'such', 'before', 'after', 'other', 'then', 'over',
+      "even",
-    'under', 'therefore', 'nonetheless', 'thereafter',
+      "ever",
-    'afterwards', 'here', 'huh', 'hah', "n't", "'t", 'here',
+      "every",
-    'neither', 'towards']
+      "for",
      "had",
      "have",
      "how",
      "into",
      "isn",
      "maybe",
      "non",
      "nor",
      "now",
      "should",
      "such",
      "than",
      "that",
      "then",
      "these",
      "this",
      "those",
      "though",
      "too",
      "was",
      "wasn",
      "were",
      "what",
      "when",
      "where",
      "which",
      "while",
      "who",
      "whom",
      "whose",
      "will",
      "with",
      "would",
      "wouldn",
      "yes"
    ]
 }
--- a/lib/treat/config/data/languages/french.rb
+++ b/lib/treat/config/data/languages/french.rb
@ -6,13 +6,143 @@
  ],
  workers: {
    processors: {
-      segmenters: [:punkt],
+      segmenters: [:scalpel],
-      tokenizers: [],
+      tokenizers: [:ptb,:stanford],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
-  }
+  },
  stop_words:
    [
      "ailleurs",
      "ainsi",
      "alors",
      "aucun",
      "aucune",
      "auquel",
      "aurai",
      "auras",
      "aurez",
      "aurons",
      "auront",
      "aussi",
      "autre",
      "autres",
      "aux",
      "auxquelles",
      "auxquels",
      "avaient",
      "avais",
      "avait",
      "avec",
      "avez",
      "aviez",
      "avoir",
      "avons",
      "celui",
      "cependant",
      "certaine",
      "certaines",
      "certains",
      "ces",
      "cet",
      "cette",
      "ceux",
      "chacun",
      "chacune",
      "chaque",
      "comme",
      "constamment",
      "davantage",
      "depuis",
      "des",
      "desquelles",
      "desquels",
      "dessous",
      "dessus",
      "donc",
      "dont",
      "duquel",
      "egalement",
      "elles",
      "encore",
      "enfin",
      "ensuite",
      "etaient",
      "etais",
      "etait",
      "etes",
      "etiez",
      "etions",
      "etre",
      "eux",
      "guere",
      "ici",
      "ils",
      "jamais",
      "jusqu",
      "laquelle",
      "legerement",
      "lequel",
      "les",
      "lesquelles",
      "lesquels",
      "leur",
      "leurs",
      "lors",
      "lui",
      "maintenant",
      "mais",
      "malgre",
      "moi",
      "moins",
      "notamment",
      "parce",
      "plupart",
      "pourtant",
      "presentement",
      "presque",
      "puis",
      "puisque",
      "quand",
      "quant",
      "que",
      "quel",
      "quelqu",
      "quelque",
      "quelques",
      "qui",
      "quoi",
      "quoique",
      "rien",
      "selon",
      "serai",
      "seras",
      "serez",
      "serons",
      "seront",
      "soient",
      "soit",
      "sommes",
      "sont",
      "sous",
      "suis",
      "telle",
      "telles",
      "tels",
      "toi",
      "toujours",
      "tout",
      "toutes",
      "tres",
      "trop",
      "une",
      "vos",
      "votre",
      "vous"
    ]
 }
--- a/lib/treat/config/data/languages/german.rb
+++ b/lib/treat/config/data/languages/german.rb
@ -1,3 +1,5 @@
 #encoding: UTF-8
 {
  dependencies: [
    'punkt-segmenter', 
@ -6,13 +8,130 @@
  ],
  workers: {
    processors: {
-      segmenters: [:punkt],
+      segmenters: [:tactful, :punkt, :stanford, :scalpel],
-      tokenizers: [],
+      tokenizers: [:stanford, :punkt],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
-  }
+  },
-}
+  stop_words:
    [
      "alle",
      "allem",
      "alles",
      "andere",
      "anderem",
      "anderen",
      "anderer",
      "anderes",
      "auf",
      "bei",
      "beim",
      "bist",
      "dadurch",
      "dein",
      "deine",
      "deiner",
      "deines",
      "deins",
      "dem",
      "denen",
      "der",
      "deren",
      "des",
      "deshalb",
      "dessen",
      "diese",
      "diesem",
      "diesen",
      "dieser",
      "dieses",
      "ein",
      "eine",
      "einem",
      "einen",
      "einer",
      "eines",
      "euer",
      "euere",
      "eueren",
      "eueres",
      "für",
      "haben",
      "habt",
      "hatte",
      "hatten",
      "hattest",
      "hattet",
      "hierzu",
      "hinter",
      "ich",
      "ihr",
      "ihre",
      "ihren",
      "ihrer",
      "ihres",
      "indem",
      "ist",
      "jede",
      "jedem",
      "jeden",
      "jeder",
      "jedes",
      "kann",
      "kannst",
      "können",
      "könnt",
      "konnte",
      "konnten",
      "konntest",
      "konntet",
      "mehr",
      "mein",
      "meine",
      "meiner",
      "meines",
      "meins",
      "nach",
      "neben",
      "nicht",
      "nichts",
      "seid",
      "sein",
      "seine",
      "seiner",
      "seines",
      "seins",
      "sie",
      "sind",
      "über",
      "und",
      "uns",
      "unser",
      "unsere",
      "unter",
      "vor",
      "warst",
      "weil",
      "wenn",
      "werde",
      "werden",
      "werdet",
      "willst",
      "wir",
      "wird",
      "wirst",
      "wollen",
      "wollt",
      "wollte",
      "wollten",
      "wolltest",
      "wolltet",
      "zum",
      "zur"
    ]
 }
--- a/lib/treat/config/data/languages/italian.rb
+++ b/lib/treat/config/data/languages/italian.rb
@ -8,5 +8,155 @@
      segmenters: [:punkt],
      tokenizers: []
    }
-  }
+  },
-}
+  stop_words:
    [
      "affinche",
      "alcun",
      "alcuna",
      "alcune",
      "alcuni",
      "alcuno",
      "allora",
      "altra",
      "altre",
      "altri",
      "altro",
      "anziche",
      "certa",
      "certe",
      "certi",
      "certo",
      "che",
      "chi",
      "chiunque",
      "comunque",
      "con",
      "cosa",
      "cose",
      "cui",
      "dagli",
      "dai",
      "dall",
      "dalla",
      "dalle",
      "darsi",
      "degli",
      "del",
      "dell",
      "della",
      "delle",
      "dello",
      "dunque",
      "egli",
      "eppure",
      "esse",
      "essi",
      "forse",
      "gia",
      "infatti",
      "inoltre",
      "invece",
      "lui",
      "malgrado",
      "mediante",
      "meno",
      "mentre",
      "mie",
      "miei",
      "mio",
      "modo",
      "molta",
      "molte",
      "molti",
      "molto",
      "negli",
      "nel",
      "nella",
      "nelle",
      "nessun",
      "nessuna",
      "nessuno",
      "niente",
      "noi",
      "nostra",
      "nostre",
      "nostri",
      "nostro",
      "nulla",
      "occorre",
      "ogni",
      "ognuno",
      "oltre",
      "oltretutto",
      "oppure",
      "ovunque",
      "ovvio",
      "percio",
      "pertanto",
      "piu",
      "piuttosto",
      "poca",
      "poco",
      "poiche",
      "propri",
      "proprie",
      "proprio",
      "puo",
      "qua",
      "qual",
      "qualche",
      "qualcuna",
      "qualcuno",
      "quale",
      "quali",
      "qualunque",
      "quando",
      "quant",
      "quante",
      "quanti",
      "quanto",
      "quantunque",
      "quegli",
      "quei",
      "quest",
      "questa",
      "queste",
      "questi",
      "questo",
      "qui",
      "quindi",
      "sebbene",
      "sembra",
      "sempre",
      "senza",
      "soltanto",
      "stessa",
      "stesse",
      "stessi",
      "stesso",
      "sugli",
      "sui",
      "sul",
      "sull",
      "sulla",
      "sulle",
      "suo",
      "suoi",
      "taluni",
      "taluno",
      "tanta",
      "tanti",
      "tanto",
      "tra",
      "tuo",
      "tuoi",
      "tutt",
      "tutta",
      "tutte",
      "tutto",
      "una",
      "uno",
      "voi"
    ]
 }
--- a/lib/treat/config/data/languages/spanish.rb
+++ b/lib/treat/config/data/languages/spanish.rb
@ -8,5 +8,284 @@
      segmenters: [:punkt],
      tokenizers: []
    }
-  }
+  },
  stop_words:
    [
      "abans",
      "aca",
      "acerca",
      "ahora",
      "aixo",
      "algo",
      "algu",
      "alguien",
      "algun",
      "alguna",
      "algunas",
      "algunes",
      "alguno",
      "algunos",
      "alguns",
      "alla",
      "alli",
      "allo",
      "altra",
      "altre",
      "altres",
      "amb",
      "amunt",
      "antes",
      "aquel",
      "aquell",
      "aquella",
      "aquellas",
      "aquelles",
      "aquellos",
      "aquells",
      "aquest",
      "aquesta",
      "aquestes",
      "aquests",
      "aqui",
      "asimismo",
      "aun",
      "aunque",
      "avall",
      "cada",
      "casi",
      "com",
      "como",
      "con",
      "cosas",
      "coses",
      "cual",
      "cuales",
      "cualquier",
      "cuando",
      "damunt",
      "darrera",
      "davant",
      "debe",
      "deben",
      "deber",
      "debia",
      "debian",
      "decia",
      "decian",
      "decir",
      "deia",
      "deien",
      "del",
      "demasiado",
      "des",
      "desde",
      "despues",
      "dicen",
      "diciendo",
      "dins",
      "dir",
      "diu",
      "diuen",
      "doncs",
      "ell",
      "ellas",
      "elles",
      "ells",
      "els",
      "encara",
      "entonces",
      "ese",
      "esos",
      "esser",
      "esta",
      "estan",
      "estando",
      "estant",
      "estar",
      "estaria",
      "estarian",
      "estarien",
      "estas",
      "estos",
      "farien",
      "feia",
      "feien",
      "fent",
      "fue",
      "fueron",
      "gaire",
      "gairebe",
      "hace",
      "hacia",
      "hacian",
      "haciendo",
      "haran",
      "hauria",
      "haurien",
      "hemos",
      "hola",
      "junto",
      "lejos",
      "les",
      "lloc",
      "los",
      "menos",
      "menys",
      "meva",
      "mias",
      "mio",
      "misma",
      "mismas",
      "mismo",
      "mismos",
      "molt",
      "molta",
      "moltes",
      "mon",
      "mucha",
      "mucho",
      "muy",
      "nadie",
      "ningu",
      "nomes",
      "nosaltres",
      "nosotros",
      "nostra",
      "nostre",
      "nuestra",
      "nuestras",
      "nuestro",
      "nuestros",
      "nunca",
      "otra",
      "pasa",
      "pasan",
      "pasara",
      "pasaria",
      "passara",
      "passaria",
      "passen",
      "perque",
      "poc",
      "pocas",
      "pocos",
      "podem",
      "poden",
      "podeu",
      "podria",
      "podrian",
      "podrien",
      "poques",
      "porque",
      "potser",
      "puc",
      "pudieron",
      "pudo",
      "puede",
      "pueden",
      "puesto",
      "qualsevol",
      "quan",
      "que",
      "queria",
      "querian",
      "qui",
      "quien",
      "quienes",
      "quiere",
      "quieren",
      "quin",
      "quina",
      "quines",
      "quins",
      "quizas",
      "segueent",
      "segun",
      "sempre",
      "seran",
      "seria",
      "serian",
      "seu",
      "seva",
      "sido",
      "siempre",
      "siendo",
      "siguiente",
      "sino",
      "sobretodo",
      "solamente",
      "sovint",
      "suya",
      "suyas",
      "suyo",
      "suyos",
      "tambe",
      "tambien",
      "tanmateix",
      "tanta",
      "tanto",
      "tendran",
      "tendria",
      "tendrian",
      "tenen",
      "teu",
      "teva",
      "tiene",
      "tienen",
      "tindran",
      "tindria",
      "tindrien",
      "toda",
      "todavia",
      "todo",
      "tota",
      "totes",
      "tras",
      "traves",
      "tuvieron",
      "tuvo",
      "tuya",
      "tuyas",
      "tuyo",
      "tuyos",
      "unas",
      "unes",
      "unos",
      "uns",
      "usaba",
      "usaban",
      "usada",
      "usades",
      "usado",
      "usan",
      "usando",
      "usant",
      "usar",
      "usat",
      "usava",
      "usaven",
      "usen",
      "vaig",
      "varem",
      "varen",
      "vareu",
      "vegada",
      "vegades",
      "vez",
      "volem",
      "volen",
      "voleu",
      "vora",
      "vos",
      "vosaltres",
      "vosotros",
      "vostra",
      "vostre",
      "voy",
      "vuestra",
      "vuestras",
      "vuestro",
      "vuestros",
      "vull"
    ]
 }
--- a/lib/treat/config/data/languages/swedish.rb
+++ b/lib/treat/config/data/languages/swedish.rb
@ -8,5 +8,282 @@
      segmenters: [:punkt],
      tokenizers: []
    }
-  }
+  },
-}
+  stop_words:
    [
      "atminstone",
      "an",
      "anda",
      "aven",
      "aldrig",
      "alla",
      "alls",
      "allt",
      "alltid",
      "allting",
      "alltsa",
      "andra",
      "annan",
      "annars",
      "antingen",
      "att",
      "bakom",
      "bland",
      "blev",
      "bli",
      "bliva",
      "blivit",
      "bort",
      "bortom",
      "bredvid",
      "dar",
      "darav",
      "darefter",
      "darfor",
      "dari",
      "darigenom",
      "darvid",
      "dedar",
      "definitivt",
      "del",
      "den",
      "dendar",
      "denhar",
      "denna",
      "deras",
      "dessa",
      "dessutom",
      "desto",
      "det",
      "detta",
      "dylik",
      "efterat",
      "efter",
      "eftersom",
      "eller",
      "emellertid",
      "enbart",
      "endast",
      "enligt",
      "ens",
      "ensam",
      "envar",
      "eran",
      "etc",
      "ett",
      "exakt",
      "fatt",
      "fastan",
      "fick",
      "fler",
      "flera",
      "foljande",
      "foljde",
      "foljer",
      "for",
      "fore",
      "forhoppningsvis",
      "formodligen",
      "forr",
      "forra",
      "forutom",
      "forvisso",
      "fran",
      "framfor",
      "fullstandigt",
      "gang",
      "gar",
      "gatt",
      "ganska",
      "gav",
      "genom",
      "genomgaende",
      "ger",
      "gick",
      "gjorde",
      "gjort",
      "gor",
      "hade",
      "har",
      "harav",
      "har",
      "hej",
      "hela",
      "helst",
      "helt",
      "hitta",
      "hon",
      "honom",
      "hur",
      "huruvida",
      "huvudsakligen",
      "ibland",
      "icke",
      "ickedestomindre",
      "igen",
      "ihop",
      "inat",
      "ingen",
      "ingenstans",
      "inget",
      "innan",
      "innehalla",
      "inre",
      "inte",
      "inuti",
      "istaellet",
      "kanske",
      "klart",
      "knappast",
      "knappt",
      "kom",
      "komma",
      "kommer",
      "kraver",
      "kunde",
      "kunna",
      "lata",
      "later",
      "lagga",
      "langre",
      "laet",
      "lagd",
      "leta",
      "letar",
      "manga",
      "maste",
      "med",
      "medan",
      "medans",
      "mellan",
      "mest",
      "min",
      "mindre",
      "minst",
      "mittemellan",
      "motsvarande",
      "mycket",
      "nagon",
      "nagongang",
      "nagonsin",
      "nagonstans",
      "nagonting",
      "nagorlunda",
      "nagot",
      "namligen",
      "nar",
      "nara",
      "nasta",
      "nastan",
      "nedat",
      "nedanfor",
      "nerat",
      "ner",
      "nog",
      "normalt",
      "nummer",
      "nuvarande",
      "nytt",
      "oavsett",
      "och",
      "ocksa",
      "oppna",
      "over",
      "overallt",
      "ofta",
      "okej",
      "olika",
      "ovanfor",
      "ratt",
      "redan",
      "relativt",
      "respektive",
      "rimlig",
      "rimligen",
      "rimligt",
      "salunda",
      "savida",
      "saga",
      "sager",
      "sakert",
      "sand",
      "sarskilt",
      "satt",
      "sak",
      "samma",
      "samtliga",
      "sedd",
      "senare",
      "senaste",
      "ser",
      "sig",
      "sista",
      "sjaelv",
      "ska",
      "skall",
      "skickad",
      "skriva",
      "skulle",
      "snabb",
      "snarare",
      "snart",
      "som",
      "somliga",
      "speciellt",
      "stalla",
      "stallet",
      "starta",
      "strax",
      "stundom",
      "tackar",
      "tanka",
      "taga",
      "tagen",
      "tala",
      "tanke",
      "tidigare",
      "tills",
      "tog",
      "totalt",
      "trolig",
      "troligen",
      "tvaers",
      "tvars",
      "tycka",
      "tyckte",
      "tyvarr",
      "understundom",
      "upp",
      "uppenbarligen",
      "uppenbart",
      "utan",
      "utanfor",
      "uteslutande",
      "utom",
      "var",
      "varan",
      "vad",
      "val",
      "varde",
      "vanlig",
      "vanligen",
      "var",
      "vare",
      "varenda",
      "varfor",
      "varifran",
      "varit",
      "varje",
      "varken",
      "vars",
      "vart",
      "vem",
      "verkligen",
      "vidare",
      "vilken",
      "vill",
      "visar",
      "visst",
      "visste"
    ]
 }
--- a/lib/treat/config/data/libraries.rb
+++ b/lib/treat/config/data/libraries.rb
@ -8,5 +8,9 @@
  stanford: {
    jar_path: nil, 
    model_path: nil
  },
  open_nlp: {
    jar_path: nil,
    model_path: nil
  }
 }
--- a/lib/treat/config/data/tags.rb
+++ b/lib/treat/config/data/tags.rb
@ -24,8 +24,9 @@
      'Coordinated phrase', ['', '', 'UCP', '', '', 'COORD'],
      'Infinitival phrase', ['', '', '', '', '', 'VPinf'],
      'Verb phrase', ['', '', 'VP', '', '', ''],
      'Inverted yes/no question', ['', '', 'SQ', '', '', ''],
      'Wh adjective phrase', ['', '', 'WHADJP', '', '', ''],
-      'Wh adverb phrase', ['', '', 'WHAVP', '', '', ''],
+      'Wh adverb phrase', ['', '', 'WHADVP', '', '', ''],
      'Wh noun phrase', ['', '', 'WHNP', '', '', ''],
      'Wh prepositional phrase', ['', '', 'WHPP', '', '', ''],
      'Unknown', ['', '', 'X', '', '', ''],
@ -100,7 +101,7 @@
      'Pronoun, reflexive', ['PNX', 'PPL', 'PRP', 'PRF'],
      'Pronoun, reflexive, plural', ['PNX', 'PPLS', 'PRP', 'PRF'],
      'Pronoun, question, subject', ['PNQ', 'WPS', 'WP', 'PWAV'],
-      'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'],  # Hack
+      'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'],  # FIXME
      'Pronoun, question, object', ['PNQ', 'WPO', 'WP', 'PWAV', 'PWAT'],
      'Pronoun, existential there', ['EX0', 'EX', 'EX'],
      'Pronoun, attributive demonstrative', ['', '', '', 'PDAT'],
@ -181,7 +182,7 @@
      'Punctuation, semicolon', ['PUN', '.', '.', '', '', 'PN'],
      'Puncutation, colon or ellipsis', ['PUN', ':', ':'],
-      'Punctuationm, comma', ['PUN', ',', ',', '$,'],
+      'Punctuation, comma', ['PUN', ',', ',', '$,'],
      'Punctuation, dash', ['PUN', '-', '-'],
      'Punctuation, dollar sign', ['PUN', '', '$'],
      'Punctuation, left bracket', ['PUL', '(', '(', '$('],
@ -324,4 +325,4 @@
      ['SQ', 'Inverted yes/no question']
    ]
  } 
-}
+}
--- a/lib/treat/config/data/workers/extractors.rb
+++ b/lib/treat/config/data/workers/extractors.rb
@ -27,5 +27,13 @@
  tf_idf: {
    type: :annotator,
    targets: [:word]
  },
  similarity: {
    type: :computer,
    targets: [:entity]
  },
  distance: {
    type: :computer,
    targets: [:entity]
  }
 }
--- a/lib/treat/config/data/workers/lexicalizers.rb
+++ b/lib/treat/config/data/workers/lexicalizers.rb
@ -1,11 +1,12 @@
 {
  taggers: {
    type: :annotator,
-    targets: [:phrase, :token]
+    targets: [:group, :token],
    recursive: true
  },
  categorizers: {
    type: :annotator,
-    targets: [:phrase, :token],
+    targets: [:group, :token],
    recursive: true
  },
  sensers: {
@ -14,5 +15,5 @@
    preset_option: :nym,
    presets: [:synonyms, :antonyms, 
              :hyponyms, :hypernyms],
-  }        
+  }
 }
--- a/lib/treat/config/importable.rb
+++ b/lib/treat/config/importable.rb
@ -0,0 +1,31 @@
 # Mixin that is extended by Treat::Config
 # in order to provide a single point of 
 # access method to trigger the import.
 module Treat::Config::Importable
  # Import relies on each configuration.
  require_relative 'configurable'
   # Store all the configuration in self.config
  def self.extended(base)
    class << base; attr_accessor :config; end
  end
  # Main function; loads all configuration options.
  def import!
    config, c = {}, Treat::Config::Configurable
    definition = :define_singleton_method
    Treat::Config.constants.each do |const|
      next if const.to_s.downcase.is_mixin?
      klass = Treat::Config.const_get(const)
      klass.class_eval { extend c }.configure!
      name = const.to_s.downcase.intern
      config[name] = klass.config
      Treat.send(definition, name) do
        Treat::Config.config[name]
      end
    end
    self.config = config.to_struct
  end
 end
--- a/lib/treat/config/paths.rb
+++ b/lib/treat/config/paths.rb
@ -1,9 +1,13 @@
 # Generates the following path config options:
 # Treat.paths.tmp, Treat.paths.bin, Treat.paths.lib,
 # Treat.paths.models, Treat.paths.files, Treat.paths.spec.
 class Treat::Config::Paths
  # Get the path configuration based on the 
  # directory structure loaded into Paths.
  # Note that this doesn't call super, as
  # there is no external config files to load.
  def self.configure!
    super
    root = File.dirname(File.expand_path(         # FIXME
    __FILE__)).split('/')[0..-4].join('/') + '/'
    self.config = Hash[
--- a/lib/treat/config/tags.rb
+++ b/lib/treat/config/tags.rb
@ -1,6 +1,8 @@
 # Handles all configuration related 
 # to understanding of part of speech
 # and phrasal tags.
 class Treat::Config::Tags
  # Load and align tags.
  # Generate a map of word and phrase tags 
  # to their syntactic category, keyed by 
  # tag set.
@ -16,21 +18,20 @@ class Treat::Config::Tags
    align_tags(phrase_tags, tag_sets)
    self.config[:aligned] = config
 end
-  
+ 
-  # * Helper methods for tag set config * #
+ # Helper methods for tag set config.
-  
+ # Align tag tags in the tag set 
-   # Align tag tags in the tag set 
+ def self.align_tags(tags, tag_sets)
-   def self.align_tags(tags, tag_sets)
+   wttc = {}
-     wttc = {}
+   tags.each_slice(2) do |desc, tags|
-     tags.each_slice(2) do |desc, tags|
+     category = desc.gsub(',', ' ,').
-       category = desc.gsub(',', ' ,').
+     split(' ')[0].downcase
-       split(' ')[0].downcase
+     tag_sets.each_with_index do |tag_set, i|
-       tag_sets.each_with_index do |tag_set, i|
+       next unless tags[i]
-         next unless tags[i]
+       wttc[tags[i]] ||= {}
-         wttc[tags[i]] ||= {}
+       wttc[tags[i]][tag_set] = category
-         wttc[tags[i]][tag_set] = category
+     end
-       end
+   end; return wttc
-     end; return wttc
+ end
   end
 end
--- a/lib/treat/core/dsl.rb
+++ b/lib/treat/core/dsl.rb
@ -1,36 +1,21 @@
 module Treat::Core::DSL
  # Include DSL on base.
  def self.included(base)
    self.sweeten_entities(base)
    self.sweeten_learning(base)
  end
  # Map all classes in Treat::Entities to
-  # a global builder function (Entity, etc.)
+  # a global builder function (entity, word,
-  def self.sweeten_entities(base, on = true)
+  # phrase, punctuation, symbol, list, etc.)
-    Treat.core.entities.list.each do |type|
+  def self.included(base)
-      next if type == :Symbol
+    def method_missing(sym,*args,&block)
-      kname = type.cc.intern
+      @@entities ||= Treat.core.entities.list
-      klass = Treat::Entities.const_get(kname)
+      @@learning ||= Treat.core.learning.list
-      Object.class_eval do
+      if @@entities.include?(sym)
-        define_method(kname) do |val, opts={}|
+        klass = Treat::Entities.const_get(sym.cc)
-          klass.build(val, opts)
+        return klass.build(*args)
-        end if on
+      elsif @@learning.include?(sym)
-        remove_method(name) if !on
+        klass = Treat::Learning.const_get(sym.cc)
-      end
+        return klass.new(*args)
-    end
+      else
-  end
+        super(sym,*args,&block)
-  
+        raise "Uncaught method ended up in Treat DSL."
  # Map all classes in the Learning module
  # to a global builder function (e.g. DataSet).
  def self.sweeten_learning(base, on = true)
    Treat::Learning.constants.each do |kname|
      Object.class_eval do
        define_method(kname) do |*args| 
          Treat::Learning.const_get(kname).new(*args)
        end if on
        remove_method(name) if !on
      end
    end
  end
--- a/lib/treat/core/installer.rb
+++ b/lib/treat/core/installer.rb
@ -1,11 +1,11 @@
 # A dependency manager for Treat language plugins.
 # Usage: Treat::Installer.install('language')
 module Treat::Core::Installer
-  
+
  require 'schiphol'
  # Address of the server with the files.
-  Server = 'www.louismullie.com'
+  Server = 's3.amazonaws.com/static-public-assets'
  # Filenames for the Stanford packages.
  StanfordPackages = {
@ -20,34 +20,34 @@ module Treat::Core::Installer
    :bin => File.absolute_path(Treat.paths.bin),
    :models => File.absolute_path(Treat.paths.models)
  }
-  
+
  # Install required dependencies and optional
  # dependencies for a specific language.
  def self.install(language = 'english')
-    
+
    # Require the Rubygem dependency installer.
    silence_warnings do
      require 'rubygems/dependency_installer'
    end
-    
+
    @@installer = Gem::DependencyInstaller.new
-    
+
    if language == 'travis'
      install_travis; return
    end
-    
+
    l = "#{language.to_s.capitalize} language"
    puts "\nTreat Installer, v. #{Treat::VERSION.to_s}\n\n"
-    
+
    begin
      title "Installing core dependencies."
      install_language_dependencies('agnostic')
-      
+
      title "Installing dependencies for the #{l}.\n"
      install_language_dependencies(language)
-      
+
      # If gem is installed only, download models.
      begin
        Gem::Specification.find_by_name('punkt-segmenter')
@ -73,7 +73,7 @@ module Treat::Core::Installer
    end
  end
-  
+
  # Minimal install for Travis CI.
  def self.install_travis
    install_language_dependencies(:agnostic)
@ -81,7 +81,7 @@ module Treat::Core::Installer
    download_stanford(:minimal)
    download_punkt_models(:english)
  end
-  
+
  def self.install_language_dependencies(language)
    dependencies = Treat.languages[language].dependencies
@ -92,31 +92,31 @@ module Treat::Core::Installer
  end
  def self.download_stanford(package = :minimal)
-    
+
    f = StanfordPackages[package]
    url = "http://#{Server}/treat/#{f}"
-    loc = Schiphol.download(url, 
+    loc = Schiphol.download(url,
      download_folder: Treat.paths.tmp
    )
    puts "- Unzipping package ..."
    dest = File.join(Treat.paths.tmp, 'stanford')
    unzip_stanford(loc, dest)
-    
+
    model_dir = File.join(Paths[:models], 'stanford')
    bin_dir = File.join(Paths[:bin], 'stanford')
    origin = File.join(Paths[:tmp], 'stanford')
-    
+
    # Mac hidden files fix.
    mac_remove = File.join(dest, '__MACOSX')
    if File.readable?(mac_remove)
      FileUtils.rm_rf(mac_remove)
    end
-    
+
    unless File.readable?(bin_dir)
      puts "- Creating directory bin/stanford ..."
      FileUtils.mkdir_p(bin_dir)
    end
-    
+
    unless File.readable?(model_dir)
      puts "- Creating directory models/stanford ..."
      FileUtils.mkdir_p(model_dir)
@ -127,18 +127,18 @@ module Treat::Core::Installer
    Dir.glob(File.join(origin, '*')) do |f|
      next if ['.', '..'].include?(f)
      if f.index('jar')
-        FileUtils.cp(f, File.join(Paths[:bin], 
+        FileUtils.cp(f, File.join(Paths[:bin],
        'stanford', File.basename(f)))
      elsif FileTest.directory?(f)
        FileUtils.cp_r(f, model_dir)
      end
    end
-    
+
    puts "- Cleaning up..."
    FileUtils.rm_rf(origin)
-    
+
    'Done.'
-    
+
  end
  def self.download_punkt_models(language)
@ -146,7 +146,7 @@ module Treat::Core::Installer
    f = "#{language}.yaml"
    dest = "#{Treat.paths.models}punkt/"
    url = "http://#{Server}/treat/punkt/#{f}"
-    loc = Schiphol.download(url, 
+    loc = Schiphol.download(url,
      download_folder: Treat.paths.tmp
    )
    unless File.readable?(dest)
@ -156,7 +156,7 @@ module Treat::Core::Installer
    puts "- Copying model file to models/punkt ..."
    FileUtils.cp(loc, File.join(Paths[:models], 'punkt', f))
-    
+
    puts "- Cleaning up..."
    FileUtils.rm_rf(Paths[:tmp] + Server)
@ -181,12 +181,11 @@ module Treat::Core::Installer
    begin
      puts "Installing #{dependency}...\n"
      @@installer.install(dependency)
-    rescue Exception => error
+    rescue Gem::InstallError => error
-      raise
+      puts "Warning: couldn't install " +
-      puts "Couldn't install gem '#{dependency}' " +
+      "gem '#{dependency}' (#{error.message})."
           "(#{error.message})."
    end
-    
+
  end
  # Unzip a file to the destination path.
@ -194,7 +193,7 @@ module Treat::Core::Installer
    require 'zip/zip'
    f_path = ''
-    
+
    Zip::ZipFile.open(file) do |zip_file|
      zip_file.each do |f|
        f_path = File.join(destination, f.name)
--- a/lib/treat/core/server.rb
+++ b/lib/treat/core/server.rb
@ -3,6 +3,7 @@ class Treat::Core::Server
  # Refer to http://rack.rubyforge.org/doc/classes/Rack/Server.html
  # for possible options to configure.
  def initialize(handler = 'thin', options = {})
    raise "Implementation not finished."
    require 'json'; require 'rack'
    @handler, @options = handler.capitalize, options
  end
--- a/lib/treat/entities/entities.rb
+++ b/lib/treat/entities/entities.rb
@ -4,6 +4,7 @@ module Treat::Entities
  # Represents a collection.
  class Collection < Entity; end
  # Represents a document.
  class Document < Entity; end
@ -18,6 +19,9 @@ module Treat::Entities
  # Represents a block of text 
  class Block < Section; end
  # Represents a list.
  class List < Section; end
  # * Zones and related classes * #
  # Represents a zone of text.
@ -31,9 +35,6 @@ module Treat::Entities
  # of sentences and/or phrases).
  class Paragraph < Zone; end
  # Represents a list.
  class List < Zone; end
  # * Groups and related classes * #
  # Represents a group of tokens.
--- a/lib/treat/entities/entity.rb
+++ b/lib/treat/entities/entity.rb
@ -22,7 +22,9 @@ module Treat::Entities
    attr_accessor :type
    # Autoload all the classes in /abilities.
-    include Treat::Autoload
+    path = File.expand_path(__FILE__)
    patt = File.dirname(path) + '/entity/*.rb'
    Dir.glob(patt).each { |f| require f }
    # Implements support for #register, #registry.
    include Registrable
@ -82,8 +84,11 @@ module Treat::Entities
    # 
    # Takes in a single entity or an array of 
    # entities. Returns the first child supplied.
-    # @see Treat::Registrable
+    # If a string is 
    def <<(entities, clear_parent = true)
      entities = (entities.is_a?(::String) ||
      entities.is_a?(::Numeric)) ? 
      entities.to_entity : entities
      entities = entities.is_a?(::Array) ?
      entities : [entities]
      # Register each entity in this node.
@ -121,7 +126,7 @@ module Treat::Entities
    # requested method does not exist. Also
    # provides suggestions for misspellings.
    def invalid_call(sym)
-      msg = Treat::Workers::Category.lookup(sym) ?
+      msg = Treat::Workers.lookup(sym) ?
      "Method #{sym} can't be called on a #{type}." :
      "Method #{sym} is not defined by Treat." +
      Treat::Helpers::Help.did_you_mean?(
--- a/lib/treat/entities/entity/applicable.rb
+++ b/lib/treat/entities/entity/applicable.rb
@ -57,7 +57,7 @@ module Treat::Entities::Entity::Applicable
  # Get the group of a task.
  def get_group(task)
-    g = Treat::Workers::Category.lookup(task)
+    g = Treat::Workers.lookup(task)
    unless g
      raise Treat::Exception,
      "Task #{task} does not exist."
--- a/lib/treat/entities/entity/buildable.rb
+++ b/lib/treat/entities/entity/buildable.rb
@ -15,7 +15,21 @@ module Treat::Entities::Entity::Buildable
  PunctRegexp = /^[[:punct:]\$]+$/
  UriRegexp = /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix
  EmailRegexp = /.+\@.+\..+/
-  Enclitics = %w['ll 'm 're 's 't 've]
+  Enclitics = [
              # EXAMPLE:
    "'d",     # I'd         => I would
    "'ll",    # I'll        => I will
    "'m",     # I'm         => I am
    "'re",    # We're       => We are
    "'s",     # There's     => There is
              # Let's       => Let us
    "'t",     # 'Twas       => Archaic ('Twas the night)
    "'ve",    # They've     => They have
    "n't"     # Can't       => Can not
  ]
  # Accepted formats of serialized files
  AcceptedFormats = ['.xml', '.yml', '.yaml', '.mongo']
  # Reserved folder names
  Reserved = ['.index']
@ -23,23 +37,38 @@ module Treat::Entities::Entity::Buildable
  # Build an entity from anything (can be
  # a string, numeric,folder, or file name
  # representing a raw or serialized file).
-  def build(file_or_value, options = {})
+  def build(*args)
    # This probably needs some doc.
    if args.size == 0
      file_or_value = ''
    elsif args[0].is_a?(Hash)
      file_or_value = args[0]
    elsif args.size == 1
      if args[0].is_a?(Treat::Entities::Entity)
        args[0] = [args[0]]
      end
      file_or_value = args[0]
    else
      file_or_value = args
    end
    fv = file_or_value.to_s
-    if file_or_value.is_a?(Hash)
+    if fv == ''; self.new
    elsif file_or_value.is_a?(Array)
      from_array(file_or_value)
    elsif file_or_value.is_a?(Hash)
      from_db(file_or_value)
-    elsif self == Treat::Entities::Document ||
+    elsif self == Treat::Entities::Document || (is_serialized_file?(fv))
      (fv.index('yml') || fv.index('yaml') ||
      fv.index('xml') || fv.index('mongo'))
      if fv =~ UriRegexp
-        from_url(fv, options)
+        from_url(fv)
      else
-        from_file(fv, options)
+        from_file(fv)
      end
    elsif self == Treat::Entities::Collection
      if FileTest.directory?(fv)
-        from_folder(fv, options)
+        from_folder(fv)
      else
        create_collection(fv)
      end
@ -78,8 +107,19 @@ module Treat::Entities::Entity::Buildable
    e
  end
  # Build a document from an array
  # of builders.
  def from_array(array)
    obj = self.new
    array.each do |el|
      el = el.to_entity unless el.is_a?(Treat::Entities::Entity)
      obj << el
    end
    obj
  end
  # Build a document from an URL.
-  def from_url(url, options)
+  def from_url(url)
    unless self ==
      Treat::Entities::Document
      raise Treat::Exception,
@ -88,8 +128,12 @@ module Treat::Entities::Entity::Buildable
    end
    begin
      folder = Treat.paths.files
      if folder[-1] == '/'
        folder = folder[0..-2]
      end
      f = Schiphol.download(url,
-      download_folder: Treat.paths.files,
+      download_folder: folder,
      show_progress: !Treat.core.verbosity.silence,
      rectify_extensions: true,
      max_tries: 3)
@ -97,10 +141,8 @@ module Treat::Entities::Entity::Buildable
      raise Treat::Exception,
      "Couldn't download file at #{url}."
    end
    options[:default_to] ||= 'html'
-    e = from_file(f, options)
+    e = from_file(f,'html')
    e.set :url, url.to_s
    e
@ -123,7 +165,7 @@ module Treat::Entities::Entity::Buildable
  # Build an entity from a folder with documents.
  # Folders will be searched recursively.
-  def from_folder(folder, options)
+  def from_folder(folder)
    return if Reserved.include?(folder)
@ -148,49 +190,44 @@ module Treat::Entities::Entity::Buildable
    c = Treat::Entities::Collection.new(folder)
    folder += '/' unless folder[-1] == '/'
-    
+
    if !FileTest.directory?(folder)
      FileUtils.mkdir(folder)
    end
-    
+
    c.set :folder, folder
    i = folder + '/.index'
    c.set :index, i if FileTest.directory?(i)
-    
+
    Dir[folder + '*'].each do |f|
      if FileTest.directory?(f)
        c2 = Treat::Entities::Collection.
-        from_folder(f, options)
+        from_folder(f)
        c.<<(c2, false) if c2
      else
        c.<<(Treat::Entities::Document.
-        from_file(f, options), false)
+        from_file(f), false)
      end
    end
-    
+
-    c
+    return c
  end
  # Build a document from a raw or serialized file.
-  def from_file(file, options)
+  def from_file(file,def_fmt=nil)
-    if file.index('yml') ||
+    if is_serialized_file?(file)
-      file.index('yaml') ||
+      from_serialized_file(file)
      file.index('xml') ||
      file.index('mongo')
      from_serialized_file(file, options)
    else
-      fmt = Treat::Workers::Formatters::Readers::Autoselect.
+      fmt = Treat::Workers::Formatters::Readers::Autoselect.detect_format(file,def_fmt)
-      detect_format(file, options[:default_to])
+      from_raw_file(file, fmt)
      options[:_format] = fmt
      from_raw_file(file, options)
    end
  end
  # Build a document from a raw file.
-  def from_raw_file(file, options)
+  def from_raw_file(file, def_fmt='txt')
    unless self ==
      Treat::Entities::Document
@ -204,7 +241,7 @@ module Treat::Entities::Entity::Buildable
      "Path '#{file}' does not "+
      "point to a readable file."
    end
-
+    options =  {default_format: def_fmt}
    d = Treat::Entities::Document.new
    d.set :file, file
    d.read(:autoselect, options)
@ -212,34 +249,32 @@ module Treat::Entities::Entity::Buildable
  end
  # Build an entity from a serialized file.
-  def from_serialized_file(file, options)
+  def from_serialized_file(file)
-    if file.index('mongo')
+    unless File.readable?(file)
-      options[:id] = file.scan(              # Consolidate this
+      raise Treat::Exception,
-      /([0-9]+)\.mongo/).first.first
+      "Path '#{file}' does not "+
-      from_db(:mongo, options)
+      "point to a readable file."
    else
      unless File.readable?(file)
        raise Treat::Exception,
        "Path '#{file}' does not "+
        "point to a readable file."
      end
      doc = Treat::Entities::Document.new
      doc.set :file, file
      format = nil
      if file.index('yml') || file.index('yaml')
        format = :yaml
      elsif file.index('xml')
        f = :xml
      else
        raise Treat::Exception,
        "Unreadable serialized format for #{file}."
      end
      doc.unserialize(format, options)
      doc.children[0].set_as_root!              # Fix this
      doc.children[0]
    end
    doc = Treat::Entities::Document.new
    doc.set :file, file
    format = nil
    if File.extname(file) == '.yml' ||
       File.extname(file) == '.yaml'
      format = :yaml
    elsif File.extname(file) == '.xml'
      format = :xml
    else
      raise Treat::Exception,
      "Unreadable serialized format for #{file}."
    end
    doc.unserialize(format)
    doc.children[0].set_as_root!              # Fix this
    doc.children[0]
  end
  def is_serialized_file?(path_to_check)
    (AcceptedFormats.include? File.extname(path_to_check)) && (File.file?(path_to_check))
  end
  def from_db(hash)
@ -258,9 +293,23 @@ module Treat::Entities::Entity::Buildable
  # Build any kind of entity from a string.
  def anything_from_string(string)
    case self.mn.downcase.intern
-    when :document, :collection
+    when :document
      folder = Treat.paths.files
      if folder[-1] == '/'
        folder = folder[0..-2]
      end
      now = Time.now.to_f
      doc_file = folder+ "/#{now}.txt"
      string.force_encoding('UTF-8')
      File.open(doc_file, 'w') do |f|
        f.puts string
      end
      from_raw_file(doc_file)
    when :collection
      raise Treat::Exception,
-      "Cannot create a document or " +
+      "Cannot create a " +
      "collection from a string " +
      "(need a readable file/folder)."
    when :phrase
@ -287,6 +336,7 @@ module Treat::Entities::Entity::Buildable
  end
  # This should be improved on.
  def check_encoding(string)
    string.encode("UTF-8", undef: :replace) # Fix
  end
@ -346,7 +396,7 @@ module Treat::Entities::Entity::Buildable
    end
  end
-  
+
  def create_collection(fv)
    FileUtils.mkdir(fv)
    Treat::Entities::Collection.new(fv)
--- a/lib/treat/entities/entity/checkable.rb
+++ b/lib/treat/entities/entity/checkable.rb
@ -11,8 +11,8 @@ module Treat::Entities::Entity::Checkable
    return @features[feature] if has?(feature)
    return send(feature) if do_it
    task = caller_method(2) # This is dangerous !
-    g1 = Treat::Workers::Category.lookup(task)
+    g1 = Treat::Workers.lookup(task)
-    g2 = Treat::Workers::Category.lookup(feature)
+    g2 = Treat::Workers.lookup(feature)
    raise Treat::Exception,
    "#{g1.type.to_s.capitalize} " +
--- a/lib/treat/entities/entity/countable.rb
+++ b/lib/treat/entities/entity/countable.rb
@ -41,6 +41,7 @@ module Treat::Entities::Entity::Countable
  # Returns the frequency of the given value
  # in the this entity.
  def frequency_of(value)
    value = value.downcase
    if is_a?(Treat::Entities::Token)
      raise Treat::Exception,
      "Cannot get the frequency " +
--- a/lib/treat/entities/entity/debuggable.rb
+++ b/lib/treat/entities/entity/debuggable.rb
@ -3,67 +3,64 @@
 # printed by the #print_debug function.
 module Treat::Entities::Entity::Debuggable
-  @@prev = nil
+  # Previous state and counter.
-  @@i = 0
+  @@prev, @@i = nil, 0
  # Explains what Treat is currently doing.
  # Fixme: last call will never get shown.
  def print_debug(entity, task, worker, group, options)
-
+    # Get a list of the worker's targets.
-    targs = group.targets.map do |target|
+    targets = group.targets.map(&:to_s)
      target.to_s
    end
-    if targs.size == 1
+    # List the worker's targets as either
-      t = targs[0]
+    # a single target or an and/or form
-    else
+    # (since it would be too costly to
-      t = targs[0..-2].join(', ') +
+    # actually determine what target types
-      ' and/or ' + targs[-1]
+    # were processed at runtime for each call).
-    end
+    t = targets.size == 1 ? targets[0] : targets[
    0..-2].join(', ') + ' and/or ' + targets[-1]
    # Add genitive for annotations (sing./plural)
    genitive = targets.size > 1 ? 'their' : 'its'
    # Set up an empty string and humanize task name.
    doing, human_task = '', task.to_s.gsub('_', ' ')
-    genitive = targs.size > 1 ?
+    # Base is "{task}-ed {a(n)|N} {target(s)}"
-    'their' : 'its'
+    if [:transformer, :computer].include?(group.type)
    doing = ''
    human_task = task.to_s.gsub('_', ' ')
    if group.type == :transformer ||
      group.type == :computer
      tt = human_task
      tt = tt[0..-2] if tt[-1] == 'e'
      ed = tt[-1] == 'd' ? '' : 'ed'
      doing = "#{tt.capitalize}#{ed} #{t}"
-      
+    # Base is "Annotated {a(n)|N} {target(s)}"
    elsif group.type == :annotator
      if group.preset_option
        opt = options[group.preset_option]
        form = opt.to_s.gsub('_', ' ')
        human_task[-1] = ''
        human_task = form + ' ' + human_task
      end
      doing = "Annotated #{t} with " +
      "#{genitive} #{human_task}"
    end
    # Form is '{base} in format {worker}'.
    if group.to_s.index('Formatters')
-      curr = doing +
+      curr = doing + ' in format ' + worker.to_s
-      ' in format ' +
+    # Form is '{base} using {worker}'.
      worker.to_s
    else
-      curr = doing +
+      curr = doing + ' using ' + worker.to_s.gsub('_', ' ')
      ' using ' +
      worker.to_s.gsub('_', ' ')
    end
    # Remove any double pluralization that may happen.
    curr.gsub!('ss', 's') unless curr.index('class')
    curr += '.'
-    if curr == @@prev
+    # Accumulate repeated tasks.
-      @@i += 1
+    @@i += 1 if curr == @@prev
-    else
+    
    # Change tasks, so output.
    if curr != @@prev && @@prev
      # Pluralize entity names if necessary.
      if @@i > 1
        Treat.core.entities.list.each do |e|
          @@prev.gsub!(e.to_s, e.to_s + 's')
@ -71,9 +68,15 @@ module Treat::Entities::Entity::Debuggable
        @@prev.gsub!('its', 'their')
        @@prev = @@prev.split(' ').
        insert(1, @@i.to_s).join(' ')
      # Add determiner if singular.
      else
        @@prev = @@prev.split(' ').
        insert(1, 'a').join(' ')
      end
      # Reset counter.
      @@i = 0
-      puts @@prev     # Last call doesn't get shown.
+      # Write to stdout.
      puts @@prev + '.'
    end
    @@prev = curr
--- a/lib/treat/entities/entity/delegatable.rb
+++ b/lib/treat/entities/entity/delegatable.rb
@ -88,7 +88,6 @@ module Treat::Entities::Entity::Delegatable
  # Get the default worker for that language
  # inside the given group.
  def find_worker_for_language(language, group)
    lang = Treat.languages[language]
    cat = group.to_s.split('::')[2].downcase.intern
    group = group.mn.ucc.intern
@ -96,31 +95,25 @@ module Treat::Entities::Entity::Delegatable
      raise Treat::Exception,
      "No configuration file loaded for language #{language}."
    end
    workers = lang.workers
    if !workers.respond_to?(cat) ||
       !workers[cat].respond_to?(group)
        workers = Treat.languages.agnostic.workers
    end
    if !workers.respond_to?(cat) || 
       !workers[cat].respond_to?(group)
      raise Treat::Exception,
      "No #{group} is/are available for the " +
      "#{language.to_s.capitalize} language."
    end
    workers[cat][group].first
  end
  # Return an error message and suggest possible typos.
-  def worker_not_found(klass, group)
+  def worker_not_found(worker, group)
-    "Algorithm '#{klass.mn.ucc}' couldn't be "+
+    "Worker with name '#{worker}' couldn't be "+
    "found in group #{group}." + Treat::Helpers::Help.
-    did_you_mean?(group.list.map { |c| c.ucc }, klass.ucc)
+    did_you_mean?(group.list.map { |c| c.ucc }, worker)
  end
 end
--- a/lib/treat/entities/entity/iterable.rb
+++ b/lib/treat/entities/entity/iterable.rb
@ -105,18 +105,6 @@ module Treat::Entities::Entity::Iterable
    end
    i
  end
  # Return the first element in the array, warning if not
  # the only one in the array. Used for magic methods: e.g.,
  # the magic method "word" if called on a sentence with many 
  # words, Treat will return the first word, but warn the user.
  def first_but_warn(array, type)
    if array.size > 1
      warn "Warning: requested one #{type}, but" +
      " there are many #{type}s in this entity."
    end
    array[0]
  end
 end
--- a/lib/treat/entities/entity/magical.rb
+++ b/lib/treat/entities/entity/magical.rb
@ -78,5 +78,16 @@ module Treat::Entities::Entity::Magical
  end
  # Return the first element in the array, warning if not
  # the only one in the array. Used for magic methods: e.g.,
  # the magic method "word" if called on a sentence with many 
  # words, Treat will return the first word, but warn the user.
  def first_but_warn(array, type)
    if array.size > 1
      warn "Warning: requested one #{type}, but" +
      " there are many #{type}s in this entity."
    end
    array[0]
  end
 end
--- a/lib/treat/entities/entity/stringable.rb
+++ b/lib/treat/entities/entity/stringable.rb
@ -6,6 +6,12 @@ module Treat::Entities::Entity::Stringable
  # Returns the entity's true string value.
  def to_string;  @value.dup; end
  # Returns an array of the childrens' string
  # values, found by calling #to_s on them.
  def to_a; @children.map { |c| c.to_s }; end
  alias :to_ary :to_a
  # Returns the entity's string value by
  # imploding the value of all terminal
  # entities in the subtree of that entity.
@ -52,16 +58,14 @@ module Treat::Entities::Entity::Stringable
   end
  # Helper method to implode the string value of the subtree.
-  def implode
+  def implode(value = "")
    return @value.dup if !has_children?
    value = ''
    each do |child|
      if child.is_a?(Treat::Entities::Section)
-        value += "\n\n"
+        value << "\n\n"
      end
      if child.is_a?(Treat::Entities::Token) || child.value != ''
@ -69,14 +73,14 @@ module Treat::Entities::Entity::Stringable
          child.is_a?(Treat::Entities::Enclitic)
          value.strip!
        end
-        value += child.to_s + ' '
+        value << child.to_s + ' '
      else
-        value += child.implode
+        child.implode(value)
      end
      if child.is_a?(Treat::Entities::Title) ||
        child.is_a?(Treat::Entities::Paragraph)
-        value += "\n\n"
+        value << "\n\n"
      end
    end
--- a/lib/treat/helpers/hash.rb
+++ b/lib/treat/helpers/hash.rb
@ -1,18 +1,29 @@
 # Helper methods to manipulate hashes.
 class Treat::Helpers::Hash
-  # Allow getting the caller method in any context.
+  # Mixin to allow conversion of hashes to
-  Hash.class_eval do
+  # nested structs with the keys as attributes.
  module ToStruct
    # Converts a hash to nested structs.
-    def self.hash_to_struct(hash)
+    def to_struct
-      return hash if hash.keys.
+      hash = self
-      select { |k| !k.is_a?(Symbol) }.size > 0
+      symbols = hash.keys.select { |k| 
-      struct = Struct.new(*hash.keys).new(*hash.values)
+      !k.is_a?(Symbol) }.size
      return hash if symbols > 0
      klass = Struct.new(*hash.keys)
      struct = klass.new(*hash.values)
      hash.each do |key, value|
        if value.is_a?(Hash)
-          struct[key] = self.hash_to_struct(value)
+          v = value.to_struct
          struct[key] = v
        end
      end; return struct
    end
  end
  # Include the mixins on the core Hash class.
  Hash.class_eval do
    include Treat::Helpers::Hash::ToStruct
  end
 end
--- a/lib/treat/helpers/help.rb
+++ b/lib/treat/helpers/help.rb
@ -32,30 +32,4 @@ class Treat::Helpers::Help
    msg
  end
  # Return the levensthein distance between
  # two strings taking into account the costs
  # of insertion, deletion, and substitution.
  # Used by did_you_mean? to detect typos.
  def self.levenshtein(first, other, ins=1, del=1, sub=1)
    return nil if first.nil? || other.nil?
    dm = []
    dm[0] = (0..first.length).collect { |i| i * ins}
    fill = [0] * (first.length - 1).abs
    for i in 1..other.length
      dm[i] = [i * del, fill.flatten]
    end
    for i in 1..other.length
      for j in 1..first.length
        dm[i][j] = [
          dm[i-1][j-1] +
          (first[i-1] ==
          other[i-1] ? 0 : sub),
          dm[i][j-1] + ins,
          dm[i-1][j] + del
        ].min
      end
    end
    dm[other.length][first.length]
  end
 end
--- a/lib/treat/helpers/object.rb
+++ b/lib/treat/helpers/object.rb
@ -4,46 +4,40 @@ class Treat::Helpers::Object
  # Allow introspection onto what method called
  # another one at runtime (useful for debugging).
  module CallerMethod
-    
+    # Pattern to match method from trace.
    CMPattern = /^(.+?):(\d+)(?::in `(.*)')?/
    # Return the name of the method that 
    # called the method that calls this method.
    def caller_method(n = 3)
      at = caller(n).first
-      /^(.+?):(\d+)(?::in `(.*)')?/ =~ at
+      CMPattern =~ at
      Regexp.last_match[3].
      gsub('block in ', '').intern
    end
  end
  # Retrieve the last name of a class/module
  # (i.e. the part after the last "::").
  module ModuleName
    def module_name; self.to_s.split('::')[-1]; end
    alias :mn :module_name
  end
  module Verbosity
    # Runs a block of code without warnings.
    def silence_warnings(&block)
-      warn_level = $VERBOSE
+      warn_level = $VERBOSE; $VERBOSE = nil
-      $VERBOSE = nil
+      result = block.call; $VERBOSE = warn_level
      result = block.call
      $VERBOSE = warn_level
      result
    end
    # Runs a block of code while blocking stdout.
    def silence_stdout(log = '/dev/null')
      unless Treat.core.verbosity.silence
        yield; return
      end
-      old = $stdout.dup
+      file, old, ret = File.new(log, 'w'), 
-      $stdout.reopen(File.new(log, 'w'))
+      $stdout.dup, nil; $stdout.reopen(file)
-      yield
+      ret = yield; $stdout = old; return ret
      $stdout = old
    end
  end
--- a/lib/treat/helpers/string.rb
+++ b/lib/treat/helpers/string.rb
@ -54,7 +54,7 @@ class Treat::Helpers::String
      if @@cc_cache[o_phrase]
        return @@cc_cache[o_phrase] 
      end
-      if Treat.core.acronyms.include?(phrase)
+      if Treat.core.acronyms.include?(phrase.downcase)
        phrase = phrase.upcase
      else
        phrase.gsub!(Regex) { |a| a.upcase }
@ -99,12 +99,19 @@ class Treat::Helpers::String
  end
  # Determines whether module is 
  # an "-able" mixin kind of thing.
  module IsMixin
    def is_mixin?; to_s[-4..-1] == 'able'; end
  end
  # Graft the helpers onto the string module.
  String.class_eval do
    include Treat::Helpers::String::CamelCaseable
    include Treat::Helpers::String::UnCamelCaseable
    include Treat::Helpers::String::Escapable
    include Treat::Helpers::String::Unescapable
    include Treat::Helpers::String::IsMixin
  end
  # Graft camel casing onto symbols.
--- a/lib/treat/helpers/verbosity.rb
+++ b/lib/treat/helpers/verbosity.rb
@ -1,7 +0,0 @@
 # Handles the verbosity for external
 # programs (gems, binaries, etc.)
 module Treat::Helpers::Verbosity
 end
--- a/lib/treat/learning/problem.rb
+++ b/lib/treat/learning/problem.rb
@ -63,7 +63,7 @@ class Treat::Learning::Problem
  # all of the features.
  def export_features(e, include_answer = true)
    features = export(e, @features)
-    return features unless include_answer
+    return features if !include_answer
    features << (e.has?(@question.name) ? 
    e.get(@question.name) : @question.default)
    features
@ -80,9 +80,11 @@ class Treat::Learning::Problem
  def export(entity, exports)
    unless @question.target == entity.type
      targ, type = @question.target, entity.type
      raise Treat::Exception, 
-      "This classification problem targets #{@question.target}s, " +
+      "This classification problem targets " +
-      "but a(n) #{entity.type} was passed to export instead."
+      "#{targ}s, but a(n) #{type} " +
      "was passed to export instead."
    end
    ret = []
    exports.each do |export|
@ -116,9 +118,8 @@ class Treat::Learning::Problem
    question = Treat::Learning::Question.new(
      hash['question']['name'], 
      hash['question']['target'],
      hash['question']['type'],
      hash['question']['default'],
-      hash['question']['labels']
+      hash['question']['type']
    )
    features = []
    hash['features'].each do |feature|
--- a/lib/treat/learning/question.rb
+++ b/lib/treat/learning/question.rb
@ -16,12 +16,9 @@ class Treat::Learning::Question
  attr_reader :type
  # Default for the answer to the question.
  attr_reader :default
  # A list of possible answers to the question.
  attr_reader :labels
  # Initialize the question.
-  def initialize(name, target, 
+  def initialize(name, target, default = nil, type = :continuous)
    type = :continuous, default = nil, labels = [])
    unless name.is_a?(Symbol)
      raise Treat::Exception, 
      "Question name should be a symbol."
@ -35,8 +32,8 @@ class Treat::Learning::Question
      raise Treat::Exception, "Type should be " +
      "continuous or discrete."
    end
-    @name, @target, @type, @default, @labels = 
+    @name, @target, @type, @default = 
-     name,  target,  type,  default,  labels
+     name,  target,  type,  default
  end
  # Custom comparison operator for questions.
@ -44,8 +41,7 @@ class Treat::Learning::Question
    @name == question.name &&
    @type == question.type &&
    @target == question.target &&
-    @default == question.default &&
+    @default == question.default
    @labels = question.labels
  end
 end
--- a/lib/treat/loaders/bind_it.rb
+++ b/lib/treat/loaders/bind_it.rb
@ -0,0 +1,52 @@
 class Treat::Loaders::BindIt
  # Keep track of whether its loaded or not.
  @@loaded = {}
  # Load CoreNLP package for a given language.
  def self.load(klass, name, language = nil)
    return if @@loaded[klass]
    language ||= Treat.core.language.default
    jar_path   = Treat.libraries[name].jar_path || 
                 Treat.paths.bin + "#{name}/"
    model_path = Treat.libraries[name].model_path || 
                 Treat.paths.models + "#{name}/"
    if !File.directory?(jar_path)
      raise Treat::Exception, "Looking for #{klass} " +
      "library JAR files in #{jar_path}, but it is " +
      "not a directory. Please set the config option " +
      "Treat.libraries.#{name}.jar_path to a folder " +
      "containing the appropriate JAR files."
    end
    if !File.directory?(model_path)
      raise Treat::Exception, "Looking for #{klass} " +
      "library model files in #{model_path}, but it " +
      "is not a directory. Please set the config option " +
      "Treat.libraries.#{name}.model_path to a folder " +
      "containing the appropriate JAR files."
    end
    klass.jar_path = jar_path
    klass.model_path = model_path
    klass.use language
    if Treat.core.verbosity.silence
      if Gem.win_platform?
        klass.log_file = 'NUL'
      else
        klass.log_file = '/dev/null'
      end
    end
    klass.bind
    @@loaded[klass] = true
  end
 end
--- a/lib/treat/loaders/linguistics.rb
+++ b/lib/treat/loaders/linguistics.rb
@ -10,14 +10,13 @@ class Treat::Loaders::Linguistics
  # to the supplied language; raises an exception
  # if there is no such language class registered.
  def self.load(language)
-    silence_warnings do
+    code = language.to_s[0..1].intern # FIX
-      # Linguistics throws warnings; silence them.
+    unless @@languages[language]
-      silence_warnings { require 'linguistics' }
+      require 'linguistics'
-      code = language.to_s[0..1].upcase
+      Linguistics.use(code)
-      @@languages[language] ||= 
+      @@languages[language] = true
      ::Linguistics.const_get(code)
    end
-    return @@languages[language]
+    code
  rescue RuntimeError
    raise Treat::Exception,
    "Ruby Linguistics does not have a module " +
--- a/lib/treat/loaders/open_nlp.rb
+++ b/lib/treat/loaders/open_nlp.rb
@ -0,0 +1,11 @@
 require_relative 'bind_it'
 # A helper class to load the OpenNLP package.
 class Treat::Loaders::OpenNLP < Treat::Loaders::BindIt
  def self.load(language = nil)
    require 'open-nlp'
    super(OpenNLP, :open_nlp, language)
  end
 end
--- a/lib/treat/loaders/stanford.rb
+++ b/lib/treat/loaders/stanford.rb
@ -1,24 +1,20 @@
-# A helper class to load the CoreNLP package.
+require_relative 'bind_it'
 class Treat::Loaders::Stanford
  # Keep track of whether its loaded or not.
  @@loaded = false
-  # Load CoreNLP package for a given language.
+# A helper class to load the CoreNLP package.
-  def self.load(language = nil)
+class Treat::Loaders::Stanford < Treat::Loaders::BindIt
    return if @@loaded
    require 'stanford-core-nlp'
    language ||= Treat.core.language.default
    StanfordCoreNLP.jar_path = 
    Treat.libraries.stanford.jar_path || 
    Treat.paths.bin + 'stanford/'
    StanfordCoreNLP.model_path = 
    Treat.libraries.stanford.model_path || 
    Treat.paths.models + 'stanford/'
    StanfordCoreNLP.use(language)
    StanfordCoreNLP.log_file = '/dev/null' if 
    Treat.core.verbosity.silence
    StanfordCoreNLP.bind; @@loaded = true
  end
-end
+  def self.load(language = nil)
    require 'stanford-core-nlp'
    super(StanfordCoreNLP, :stanford, language)
  end
  def self.find_model(name, language)
    language = language.intern
    model_file = StanfordCoreNLP::Config::Models[name][language]
    model_dir  = StanfordCoreNLP::Config::ModelFolders[name]
    model_path = Treat.libraries.stanford.model_path ||
    File.join(Treat.paths.models, 'stanford')
    File.join(model_path, model_dir, model_file)
  end
 end
--- a/lib/treat/modules.rb
+++ b/lib/treat/modules.rb
@ -1,13 +1,13 @@
 module Treat
  # Contains common utility/helper functions.
  module Helpers; include Autoload; end
  # Contains all the configuration options.
  module Config; include Autoload; end
-  # Load all the configuration options.
+  # Import all the configuration options.
-  Treat::Config.configure!
+  Treat::Config.import!
  # Contains common utility/helper functions.
  module Helpers; include Autoload; end
  # Contains classes to load external libraries.
  module Loaders; include Autoload; end
@ -20,7 +20,10 @@ module Treat
  # Contains all the worker categories.
  module Workers; include Autoload; end
-
+  
  # Make all the worker categories.
  Treat::Workers.categorize!
  # Installs builders on core Ruby objects.
  module Proxies; include Autoload; end
--- a/lib/treat/proxies/array.rb
+++ b/lib/treat/proxies/array.rb
@ -0,0 +1,27 @@
 module Treat::Proxies
  module Array
    # Include base proxy functionality.
    include Treat::Proxies::Proxy
    def method_missing(sym, *args, &block)
      if [:do, :apply].include?(sym) || 
        Treat::Workers.lookup(sym)
        map do |el|
          if el.is_a?(Treat::Entities::Entity)
            el.send(sym, *args)
          else
            el.to_entity.send(sym, *args)
          end
        end
      else
        super(sym, *args, &block)
      end
    end
  end
  # Include Treat methods on numerics.
  ::Array.class_eval do
    include Treat::Proxies::Array
  end
 end
--- a/lib/treat/proxies/language.rb
+++ b/lib/treat/proxies/language.rb
@ -21,17 +21,18 @@ module Treat::Proxies
      !Treat.core.language.detect
      if is_a?(Treat::Entities::Symbol) ||
-        is_a?(Treat::Entities::Number)
+        is_a?(Treat::Entities::Number) ||
        is_a?(Treat::Entities::Punctuation)
        return Treat.core.language.default
      end
      dlvl = Treat.core.language.detect_at
      dklass = Treat::Entities.const_get(dlvl.cc)
-      if self.class.compare_with(
+      if self.class.compare_with(dklass) < 1
        dklass) < 1 && has_parent?
        anc = ancestor_with_type(dlvl)
        return anc.language if anc
        return self.parent.language if has_parent?
      end
      extractor ||= Treat.workers.
--- a/lib/treat/proxies/proxy.rb
+++ b/lib/treat/proxies/proxy.rb
@ -10,15 +10,16 @@ module Treat::Proxies
    # object and send the method call to the entity.
    def method_missing(sym, *args, &block)
      if [:do, :apply].include?(sym) || 
-        Treat::Workers::Category.lookup(sym)
+        Treat::Workers.lookup(sym)
-          to_entity.send(sym, *args)
+        to_entity.send(sym, *args)
      else
        super(sym, *args, &block)
      end
    end
    # Create an unknown type of entity by default.
    def to_entity(builder = nil)
-      Treat::Entities::Unknown(self.to_s)
+      Treat::Entities::Unknown.new(self.to_s)
    end
  end
--- a/lib/treat/version.rb
+++ b/lib/treat/version.rb
@ -1,12 +1,12 @@
 module Treat
-  
+
  # The current version of Treat.
-  VERSION = "1.2.0"
+  VERSION = '2.1.0'
-  
+
  # Treat requires Ruby >= 1.9.2
  if RUBY_VERSION < '1.9.2'
    raise "Treat requires Ruby version 1.9.2 " +
    "or higher, but current is #{RUBY_VERSION}."
  end
-end
+end
--- a/lib/treat/workers/categorizable.rb
+++ b/lib/treat/workers/categorizable.rb
@ -1,51 +1,49 @@
 # This module creates all the worker categories
 # and the groups within these categories and adds
 # the relevant hooks on the appropriate entities.
-module Treat::Workers::Category
+module Treat::Workers::Categorizable
-  require_relative 'group'
+  require_relative 'groupable'
  # A lookup table for entity types.
  @@lookup = {}
  # Find a worker group based on method.
-  def self.lookup(method)
+  def lookup(method); @@lookup[method]; end
    @@lookup[method]
  end
-  def self.create_categories
+  def categorize!
    Treat.workers.members.each do |cat|
-      create_category(cat.
+      name = cat.capitalize.intern
-      capitalize.intern,
+      conf = load_category_conf(cat)
-      load_category_conf(cat))
+      create_category(name, conf)
    end
  end
-  def self.load_category_conf(name)
+  def load_category_conf(name)
-    config = Treat.workers[name]
+    if !Treat.workers.respond_to?(name)
    if config.nil?
      raise Treat::Exception,
      "The configuration file " +
      "for #{cat_sym} is missing."
    else
      Treat.workers[name]
    end
    config
  end
-  def self.create_category(name, conf)
+  def create_category(name, conf)
    category = Treat::Workers.
    const_set(name, Module.new)
    conf.each_pair do |group, worker|
      name = group.to_s.cc.intern
      category.module_eval do
-        @@methods = []; def methods; 
+        @@methods = []
-        @@methods; end; def groups; 
+        def methods; @@methods; end
-        self.constants; end
+        def groups; self.constants; end
      end
-      self.create_group(name, worker, category)
+      create_group(name, worker, category)
    end
  end
-  def self.create_group(name, conf, category)
+  def create_group(name, conf, category)
    group = category.const_set(name, Module.new)
    self.set_group_options(group, conf)
    self.bind_group_targets(group)
@ -54,27 +52,9 @@ module Treat::Workers::Category
    @@lookup[group.method] = group
  end
-  def self.bind_group_targets(group)
+  def set_group_options(group, conf)
    group.targets.each do |entity_type|
      entity = Treat::Entities.
      const_get(entity_type.cc)
      entity.class_eval do
        add_workers group
      end
    end
  end
  def self.register_group_presets(group, conf)
    return unless conf.respond_to? :presets
    conf.presets.each do |m|
      @@methods << m
      @@lookup[m] = group
    end
  end
  def self.set_group_options(group, conf)
    group.module_eval do
-      extend Treat::Workers::Group
+      extend Treat::Workers::Groupable
      self.type = conf.type
      self.targets = conf.targets
      if conf.respond_to?(:default)
@ -92,6 +72,22 @@ module Treat::Workers::Category
    end
  end
-  self.create_categories
+  def bind_group_targets(group)
    group.targets.each do |entity_type|
      entity = Treat::Entities.
      const_get(entity_type.cc)
      entity.class_eval do
        add_workers group
      end
    end
  end
  def register_group_presets(group, conf)
    return unless conf.respond_to?(:presets)
    conf.presets.each do |method|
      @@methods << method
      @@lookup[method] = group
    end
  end
 end
--- a/lib/treat/workers/extractors/distance/levenshtein.rb
+++ b/lib/treat/workers/extractors/distance/levenshtein.rb
@ -0,0 +1,35 @@
 # The C extension uses char* strings, and so Unicode strings 
 # will give incorrect distances. Need to provide a pure 
 # implementation if that's the case (FIX).
 class Treat::Workers::Extractors::Distance::Levenshtein
  require 'levenshtein'
  DefaultOptions = {
    ins_cost: 1,
    del_cost: 1,
    sub_cost: 1
  }
  @@matcher = nil
  # Return the levensthein distance between
  # two strings taking into account the costs
  # of insertion, deletion, and substitution.
  def self.distance(entity, options)
    options = DefaultOptions.merge(options)
    unless options[:to]
      raise Treat::Exception, "Must supply " +
      "a string/entity to compare to using " +
      "the option :to for this worker."
    end
    a, b = entity.to_s, options[:to].to_s
    Levenshtein.distance(a, b)
  end
 end
--- a/lib/treat/workers/extractors/keywords/tf_idf.rb
+++ b/lib/treat/workers/extractors/keywords/tf_idf.rb
@ -23,19 +23,16 @@ class Treat::Workers::Extractors::Keywords::TfIdf
    tf_idfs = tf_idfs.
    sort_by {|k,v| v}.reverse
-
+   
    if tf_idfs.size <= options[:number]
      return tf_idfs
    end
    keywords = []
    i = 0
    max_count = tf_idfs.size < options[:number] ? tf_idfs.size : options[:number]
    tf_idfs.each do |word|
      w = word[0].to_s
      next if keywords.include?(w)
-      break if i > options[:number]
+      break if i > max_count
      keywords << w
      i += 1
--- a/lib/treat/workers/extractors/language/what_language.rb
+++ b/lib/treat/workers/extractors/language/what_language.rb
@ -1,9 +1,9 @@
 # Language detection using a probabilistic algorithm
-# that checks for the presence of words with Bloom 
+# that checks for the presence of words with Bloom
 # filters built from dictionaries for each language.
 #
-# Original paper: Grothoff. 2007. A Quick Introduction to 
+# Original paper: Grothoff. 2007. A Quick Introduction to
-# Bloom Filters. Department of Computer Sciences, Purdue 
+# Bloom Filters. Department of Computer Sciences, Purdue
 # University.
 class Treat::Workers::Extractors::Language::WhatLanguage
@ -35,7 +35,7 @@ class Treat::Workers::Extractors::Language::WhatLanguage
    options = DefaultOptions.merge(options)
-    @@detector ||= ::WhatLanguage.new(:possibilities)
+    @@detector ||= ::WhatLanguage.new(:all)
    possibilities = @@detector.process_text(entity.to_s)
    lang = {}
--- a/lib/treat/workers/extractors/name_tag/stanford.rb
+++ b/lib/treat/workers/extractors/name_tag/stanford.rb
@ -1,7 +1,7 @@
 # Named entity tag extraction using the Stanford NLP
 # Deterministic Coreference Resolver, which implements a
 # multi-pass sieve coreference resolution (or anaphora 
-# resolution) system.
+# resolution) system based on conditional random fields.
 #
 # Original paper: Heeyoung Lee, Yves Peirsman, Angel 
 # Chang, Nathanael Chambers, Mihai Surdeanu, Dan Jurafsky. 
@ -16,32 +16,24 @@ class Treat::Workers::Extractors::NameTag::Stanford
  def self.name_tag(entity, options = {})
    pp = nil
    language = entity.language
    Treat::Loaders::Stanford.load(language)
    isolated_token = entity.is_a?(Treat::Entities::Token)
    tokens = isolated_token ? [entity] : entity.tokens
-
+    
-    ms = StanfordCoreNLP::Config::Models[:ner][language]
+    unless classifier = @@classifiers[language]
-    model_path = Treat.libraries.stanford.model_path ||
+      model = Treat::Loaders::Stanford.find_model(:ner, language)
-    (Treat.paths.models + '/stanford/')
+      unless StanfordCoreNLP.const_defined?('CRFClassifier')
-    ms = model_path + '/' + 
+        StanfordCoreNLP.load_class('CRFClassifier', 'edu.stanford.nlp.ie.crf')
-    StanfordCoreNLP::Config::ModelFolders[:ner] +
+      end
-    ms['3class']
+      classifier = StanfordCoreNLP::CRFClassifier.getClassifier(model)
-
+      @@classifiers[language] = classifier
-    @@classifiers[language] ||=
+    end
-    StanfordCoreNLP::CRFClassifier.
+    
    getClassifier(ms)
    token_list = StanfordCoreNLP.get_list(tokens)
-    sentence = @@classifiers[language].
+    sentence = classifier.classify_sentence(token_list)
    classify_sentence(token_list)
    i = 0
    n = 0
    sentence.each do |s_token|
      tag = s_token.get(:answer).to_s.downcase
@ -49,14 +41,9 @@ class Treat::Workers::Extractors::NameTag::Stanford
      return tag if isolated_token
      if tag
        tokens[i].set :name_tag, tag
        n += 1
      end
      i += 1
    end
    entity.set :named_entity_count, n
    nil
  end
--- a/lib/treat/workers/extractors/similarity/jaro_winkler.rb
+++ b/lib/treat/workers/extractors/similarity/jaro_winkler.rb
@ -0,0 +1,38 @@
 # Similarity measure for short strings such as person names.
 # C extension won't work for Unicode strings; need to set 
 # extension to "pure" in that case (FIX).
 class Treat::Workers::Extractors::Similarity::JaroWinkler
  require 'fuzzystringmatch'
  DefaultOptions = {
    threshold: 0.7,
    implementation: nil
  }
  @@matcher = nil
  def self.similarity(entity, options={})
    options = DefaultOptions.merge(options)
    unless options[:to]
      raise Treat::Exception, "Must supply " +
      "a string/entity to compare to using " +
      "the option :to for this worker."
    end
    unless @@matcher
      impl = options[:implementation]
      impl ||= defined?(JRUBY_VERSION) ? :pure : :native
      klass = FuzzyStringMatch::JaroWinkler
      @@matcher = klass.create(impl)
    end
    a, b = entity.to_s, options[:to].to_s
    @@matcher.getDistance(a, b)
  end
 end
--- a/lib/treat/workers/extractors/similarity/tf_idf.rb
+++ b/lib/treat/workers/extractors/similarity/tf_idf.rb
@ -0,0 +1,43 @@
 # Calculates the TF*IDF score of words.
 class Treat::Workers::Extractors::Similarity::TfIdf
  require 'tf-idf-similarity'
  def self.similarity(entity, options={})
    raise 'Not currently implemented.'
    unless options[:to] && 
           options[:to].type == :document
      raise Treat::Exception, 'Must supply ' +
      'a document to compare to using ' +
      'the option :to for this worker.'
    end
    unless options[:to].parent_collection && 
           entity.parent_collection
      raise Treat::Exception, 'The TF*IDF ' +
      'similarity algorithm can only be applied ' +
      'to documents that are inside collections.' 
    end
    coll = TfIdfSimilarity::Collection.new
    entity.each_document do |doc|
      tdoc = TfIdfSimilarity::Document.new(doc.to_s)
      term_counts = Hash.new(0)
      doc.each_word do |word| 
        val = word.value.downcase
        term_counts[val] ||= 0.0
        term_counts[val] += 1.0
      end
      size = term_counts.values.reduce(:+)
      tdoc.instance_eval do
        @term_counts, @size = term_counts, size
      end
      coll << tdoc
    end
    puts coll.similarity_matrix.inspect
  end
 end
--- a/lib/treat/workers/extractors/time/kronic.rb
+++ b/lib/treat/workers/extractors/time/kronic.rb
@ -0,0 +1,20 @@
 # Time/date extraction using a simple rule-based library.
 # 
 # Supported formats: Today, yesterday, tomorrow, 
 # last thursday, this thursday, 14 Sep, 14 June 2010. 
 # Any dates without a year are assumed to be in the past.
 class Treat::Workers::Extractors::Time::Kronic
  require 'kronic'
  require 'date'
  # Return the date information contained within 
  # the entity by parsing it with the 'chronic' gem.
  #
  # Options: none.
  def self.time(entity, options = {})
    time = Kronic.parse(entity.to_s)
    time.is_a?(DateTime) ? time : nil
  end
 end
--- a/lib/treat/workers/extractors/topic_words/lda.rb
+++ b/lib/treat/workers/extractors/topic_words/lda.rb
@ -53,9 +53,9 @@ class Treat::Workers::Extractors::TopicWords::LDA
    # Run the EM algorithm using random 
    # starting points
-    silence_stdout do
+    Treat.core.verbosity.silence ?
-      lda.em('random')
+    silence_stdout { lda.em('random') }  :
-    end
+    lda.em('random')
    # Load the vocabulary.
    if options[:vocabulary]
--- a/lib/treat/workers/formatters/readers/autoselect.rb
+++ b/lib/treat/workers/formatters/readers/autoselect.rb
@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Readers::Autoselect
  ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/
  ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
  DefaultOptions = {
-    :default_to => 'txt'
+    :default_to => 'document'
  }
  # Choose a reader to use.
@ -12,7 +12,9 @@ class Treat::Workers::Formatters::Readers::Autoselect
  #  - (Symbol) :default_to => format to default to.
  def self.read(document, options = {})
    options = DefaultOptions.merge(options)
-    document.read(detect_format(document.file, options[:default_to]))
+    fmt = detect_format(document.file, options[:default_to])
    Treat::Workers::Formatters::Readers.
    const_get(fmt.cc).read(document,options)
  end
  def self.detect_format(filename, default_to = nil)
--- a/lib/treat/workers/formatters/readers/document.rb
+++ b/lib/treat/workers/formatters/readers/document.rb
@ -0,0 +1,17 @@
 require 'yomu'
 # This class is a wrapper for Yomu.
 # Yomu is a library for extracting text and metadata from files and documents
 # using the Apache Tika content analysis toolkit.
 class Treat::Workers::Formatters::Readers::Document
  # Extract the readable text from any document.
  #
  # Options: none.
  def self.read(document, options = {})
    yomu = Yomu.new(document.file)
    document.value = yomu.text
    document.set :format, yomu.mimetype.extensions.first
    document
  end
 end
--- a/lib/treat/workers/formatters/readers/html.rb
+++ b/lib/treat/workers/formatters/readers/html.rb
@ -11,7 +11,8 @@ class Treat::Workers::Formatters::Readers::HTML
  # By default, don't backup the original HTML
  DefaultOptions = {
    :keep_html => false,
-    :tags => %w[p div h1 h2 h3 ul ol dl dt li]
+    :tags => %w[p div h1 h2 h3 ul ol dl dt li img],
  }
  # Read the HTML document and strip it of its markup.
@ -46,6 +47,7 @@ class Treat::Workers::Formatters::Readers::HTML
      d = Readability::Document.new(html, options)
      document.value = "<h1>#{d.title}</h1>\n" + d.content
      document.set :format, 'html'
      document.set :images, d.images
    end
    document
--- a/lib/treat/workers/formatters/readers/image.rb
+++ b/lib/treat/workers/formatters/readers/image.rb
@ -7,8 +7,8 @@
 # statistical natural language modeling, and multi-
 # lingual capabilities."
 #
-# Original paper: Google Ocropus Engine: Breuel, 
+# Original paper: Google Ocropus Engine: Breuel,
-# Thomas M. The Ocropus Open Source OCR System. 
+# Thomas M. The Ocropus Open Source OCR System.
 # DFKI and U. Kaiserslautern, Germany.
 class Treat::Workers::Formatters::Readers::Image
@ -18,29 +18,31 @@ class Treat::Workers::Formatters::Readers::Image
  #
  # - (Boolean) :silent => whether to silence Ocropus.
  def self.read(document, options = {})
-    
+
    read = lambda do |doc|
      self.create_temp_dir do |tmp|
-        `ocropus book2pages #{tmp}/out #{doc.file}`
+        `ocropus-nlbin -o #{tmp}/out #{doc.file}`
-        `ocropus pages2lines #{tmp}/out`
+        `ocropus-gpageseg #{tmp}/out/????.bin.png --minscale 2`
-        `ocropus lines2fsts #{tmp}/out`
+        `ocropus-rpred #{tmp}/out/????/??????.bin.png`
-        `ocropus buildhtml #{tmp}/out > #{tmp}/output.html`
+        `ocropus-hocr #{tmp}/out/????.bin.png -o #{tmp}/book.html`
-        doc.set :file,  "#{tmp}/output.html"
+        doc.set :file,  "#{tmp}/book.html"
        doc.set :format, :html
        doc = doc.read(:html)
        doc.set :file, f
        doc.set :format, 'image'
      end
    end
-    
+
-    options[:silent] ? silence_stdout { 
+    Treat.core.verbosity.silence ? silence_stdout {
    read.call(document) } : read.call(document)
-    
+
    document
  end
-  
+
-  # Create a dire that gets deleted after execution of the block.
+  # Create a dir that gets deleted after execution of the block.
  def self.create_temp_dir(&block)
    if not FileTest.directory?(Treat.paths.tmp)
      FileUtils.mkdir(Treat.paths.tmp)
    end
    dname = Treat.paths.tmp +
    "#{Random.rand(10000000).to_s}"
    Dir.mkdir(dname)
@ -48,5 +50,5 @@ class Treat::Workers::Formatters::Readers::Image
  ensure
    FileUtils.rm_rf(dname)
  end
-  
+
 end
--- a/lib/treat/workers/formatters/readers/pdf.rb
+++ b/lib/treat/workers/formatters/readers/pdf.rb
@ -32,6 +32,9 @@ class Treat::Workers::Formatters::Readers::PDF
  # Create a temporary file which is deleted
  # after execution of the block.
  def self.create_temp_file(ext, value = nil, &block)
    if not FileTest.directory?(Treat.paths.tmp)
      FileUtils.mkdir(Treat.paths.tmp)
    end
    fname = Treat.paths.tmp + 
    "#{Random.rand(10000000).to_s}.#{ext}"
    File.open(fname, 'w') do |f|
--- a/lib/treat/workers/formatters/readers/xml.rb
+++ b/lib/treat/workers/formatters/readers/xml.rb
@ -30,7 +30,7 @@ class Treat::Workers::Formatters::Readers::XML
    @@xml_reader ||= StanfordCoreNLP.load(
    :tokenize, :ssplit, :cleanxml)
-    text = StanfordCoreNLP::Text.new(xml)
+    text = StanfordCoreNLP::Annotation.new(xml)
    @@xml_reader.annotate(text)
    text.get(:sentences).each do |sentence|
--- a/lib/treat/workers/formatters/serializers/xml.rb
+++ b/lib/treat/workers/formatters/serializers/xml.rb
@ -9,18 +9,19 @@ class Treat::Workers::Formatters::Serializers::XML
  # - (String) :file => a file to write to.
  def self.serialize(entity, options = {})
    options[:file] ||= (entity.id.to_s + '.xml')
-    if options[:indent].nil?
+    options[:indent] = 0
-      options = options.merge({:indent => 0})
+    enc = entity.to_s.encoding.to_s.downcase
-    end
+    string = "<?xml version=\"1.0\" " +
-    indent = options[:indent]
+    "encoding=\"#{enc}\" ?>\n<treat>\n"
-    if options[:indent] == 0
+    val = self.recurse(entity, options)
-      enc = entity.to_s.encoding.to_s.downcase
+    string += "#{val}\n</treat>"
-      string = "<?xml version=\"1.0\" " +
+    File.open(options[:file], 'w') do |f|
-      "encoding=\"#{enc}\" ?>\n<treat>\n"
+      f.write(string)
-    else
+    end; return options[:file]
-      string = ''
+  end
-    end
+  
-    spaces = ''
+  def self.recurse(entity, options)
    spaces, string = '', ''
    options[:indent].times { spaces << ' ' }
    attributes = " id='#{entity.id}'"
    if !entity.features.nil? && entity.features.size != 0
@ -56,27 +57,16 @@ class Treat::Workers::Formatters::Serializers::XML
    if entity.has_children?
      options[:indent] += 1
      entity.children.each do |child|
-        string =
+        string += self.recurse(child, options)
        string +
        serialize(child, options)
      end
      options[:indent] -= 1
    else
-      string = string + "#{escape(entity.value)}"
+      string += "#{escape(entity.value)}"
    end
    unless entity.is_a?(Treat::Entities::Token)
      string += "#{spaces}"
    end
    string += "</#{tag}>\n"
    if indent == 0
      string += "\n</treat>"
      if options[:file]
        File.open(options[:file], 'w') do |f|
          f.write(string)
        end
      end
    end
    options[:file]
  end
  def self.escape(input)
--- a/lib/treat/workers/formatters/serializers/yaml.rb
+++ b/lib/treat/workers/formatters/serializers/yaml.rb
@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Serializers::YAML
  silence_warnings do
    # Require the Psych YAML serializer.
-    require 'psych'
+    require 'yaml'
  end
  # Serialize an entity in YAML format.
@ -11,7 +11,7 @@ class Treat::Workers::Formatters::Serializers::YAML
  # Options:
  # - (String) :file => a file to write to.
  def self.serialize(entity, options = {})
-    yaml = ::Psych.dump(entity)
+    yaml = ::YAML.dump(entity)
    options[:file] ||= (entity.id.to_s + '.yml')
    if options[:file]
      File.open(options[:file], 'w') do |f| 
--- a/lib/treat/workers/formatters/unserializers/mongo.rb
+++ b/lib/treat/workers/formatters/unserializers/mongo.rb
@ -17,7 +17,7 @@ class Treat::Workers::Formatters::Unserializers::Mongo
    @@database ||= Mongo::Connection.
    new(Treat.databases.mongo.host).
-    db(Treat.databases.mongo.db || db)
+    db(db || Treat.databases.mongo.db)
    supertype =  Treat::Entities.const_get(
    entity.type.to_s.capitalize.intern).superclass.mn.downcase
--- a/lib/treat/workers/formatters/unserializers/xml.rb
+++ b/lib/treat/workers/formatters/unserializers/xml.rb
@ -65,6 +65,7 @@ class Treat::Workers::Formatters::Unserializers::XML
            value = v
          else
            v = v[1..-1].intern if v[0] == ':'
            v = ":".intern if v == :''
            v = v.to_i if v =~ /^[0-9]*$/
            v = v.to_f if v =~ /^[0-9\.]*$/
            v = false if v == 'false'
--- a/lib/treat/workers/formatters/unserializers/yaml.rb
+++ b/lib/treat/workers/formatters/unserializers/yaml.rb
@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Unserializers::YAML
  silence_warnings do
    # Require the Psych YAML parser.
-    require 'psych'
+    require 'yaml'
  end
  # Require date to revive DateTime.
@ -13,7 +13,7 @@ class Treat::Workers::Formatters::Unserializers::YAML
  #
  # Options: none.
  def self.unserialize(document, options = {})
-    document << ::Psych.load(
+    document << ::YAML.load(
    File.read(document.file))
    document
  end
--- a/lib/treat/workers/groupable.rb
+++ b/lib/treat/workers/groupable.rb
@ -1,11 +1,12 @@
-module Treat::Workers::Group
+module Treat::Workers::Groupable
  # Lazily load the worker classes in the group.
  def const_missing(const)
    bits = self.ancestors[0].to_s.split('::')
    bits.collect! { |bit| bit.ucc }
    file = bits.join('/') + "/#{const.ucc}"
-    if not File.readable?(Treat.paths.lib + "#{file}.rb")
+    path = Treat.paths.lib + "#{file}.rb"
    if not File.readable?(path)
      raise Treat::Exception,
      "File '#{file}.rb' corresponding to " +
      "requested worker #{self}::#{const} " +
@ -14,7 +15,7 @@ module Treat::Workers::Group
      require file
      if not self.const_defined?(const)
        raise Treat::Exception,
-        "File #{file} does not define " +
+        "File #{file}.rb does not define " +
        "#{self}::#{const}."
      end
      const_get(const)
@ -69,9 +70,7 @@ module Treat::Workers::Group
  # Get constants in this module, excluding by
  # default those defined by parent modules.
-  def const_get(const)
+  def const_get(const); super(const, false); end
    super(const, false)
  end
  # Modify the extended class.
  def self.extended(group)
--- a/lib/treat/workers/inflectors/cardinalizers/linguistics.rb
+++ b/lib/treat/workers/inflectors/cardinalizers/linguistics.rb
@ -35,9 +35,9 @@ class Treat::Workers::Inflectors::Cardinalizers::Linguistics
  # More specific options when using :type => :ordinal:
  def self.cardinal(entity, options = {})
    options = DefaultOptions.merge(options)
-    Treat::Loaders::Linguistics.
+    lang = entity.language
-    load(options[:language]).
+    code = Treat::Loaders::Linguistics.load(lang)
-    numwords(entity.to_s, options)
+    entity.to_s.send(code).numwords(options)
  end
 end
--- a/lib/treat/workers/inflectors/conjugators/linguistics.rb
+++ b/lib/treat/workers/inflectors/conjugators/linguistics.rb
@ -35,13 +35,15 @@ module Treat::Workers::Inflectors::Conjugators::Linguistics
    options = Forms[options[:form].to_s] if options[:form]
-    klass = Treat::Loaders::Linguistics.load(entity.language)
+    code = Treat::Loaders::Linguistics.load(entity.language)
    obj = entity.to_s.send(code)
    if options[:mode] == 'infinitive'
-      silence_warnings { klass.infinitive(entity.to_s) }
+      obj.infinitive
    elsif options[:mode] == 'participle' && options[:tense] == 'present'
-      silence_warnings { klass.present_participle(entity.to_s) }
+      obj.present_participle
    elsif options[:count] == 'plural' && options.size == 1
-      silence_warnings { klass.plural_verb(entity.to_s) }
+      obj.plural_verb
    else
      raise Treat::Exception,
      'This combination of modes, tenses, persons ' +
--- a/lib/treat/workers/inflectors/declensors/english.rb
+++ b/lib/treat/workers/inflectors/declensors/english.rb
@ -21,9 +21,9 @@ class Treat::Workers::Inflectors::Declensors::English
      'option count ("singular" or "plural").'
    end
    string = entity.to_s
-    if options[:count] == 'plural'
+    if options[:count].to_s == 'plural'
      Inflect.plural(string)
-    elsif options[:count] == 'singular'
+    elsif options[:count].to_s == 'singular'
      Inflect.singular(string)
    end
  end
--- a/lib/treat/workers/inflectors/declensors/linguistics.rb
+++ b/lib/treat/workers/inflectors/declensors/linguistics.rb
@ -17,34 +17,27 @@ class Treat::Workers::Inflectors::Declensors::Linguistics
    cat = entity.get(:category)
    return if cat && !POS.include?(cat)
    unless options[:count]
      raise Treat::Exception, 'Must supply ' +
      ':count option ("singular" or "plural").'
    end
    klass = Treat::Loaders::
    Linguistics.load(entity.language)
    string = entity.to_s
-    if options[:count] == 'plural'
+    unless options[:count].to_s == 'plural'
      if (entity.has?(:category))
        result = ''
        silence_warnings do
          result = klass.send(
          :"plural_#{entity.category}",
          string)
        end
        return result
      else
        return klass.plural(string)
      end
    else
      raise Treat::Exception,
      "Ruby Linguistics does not support " +
      "singularization of words."
    end
    lang = entity.language
    code = Treat::Loaders::Linguistics.load(lang)
    obj = entity.to_s.send(code)
    if cat = entity.get(:category)
      method = "plural_#{cat}"
      obj.send(method)
    else; obj.plural; end
  end
 end
--- a/lib/treat/workers/inflectors/ordinalizers/linguistics.rb
+++ b/lib/treat/workers/inflectors/ordinalizers/linguistics.rb
@ -11,11 +11,11 @@ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
  # Desribe a number in words in ordinal form, using the
  # 'linguistics' gem.
-  def self.ordinal(number, options = {})
+  def self.ordinal(entity, options = {})
    options = DefaultOptions.merge(options)
-    klass = Treat::Loaders::
+    lang = entity.language
-    Linguistics.load(options[:language])
+    code = Treat::Loaders::Linguistics.load(lang)
-    klass.ordinate(number.to_s)
+    entity.to_s.send(code).ordinate
  end
 end
--- a/lib/treat/workers/learners/classifiers/id3.rb
+++ b/lib/treat/workers/learners/classifiers/id3.rb
@ -12,24 +12,20 @@ class Treat::Workers::Learners::Classifiers::ID3
  @@classifiers = {}
  def self.classify(entity, options = {})
-    
+    dset = options[:training]
-    set = options[:training]
+    prob = dset.problem
-    cl = set.problem
+    if !@@classifiers[prob]
    if !@@classifiers[cl]
      dec_tree = DecisionTree::ID3Tree.new(
-      cl.feature_labels.map { |l| l.to_s }, 
+      prob.feature_labels.map { |l| l.to_s }, 
-      set.items.map { |i| i[:features]}, 
+      dset.items.map { |i| i[:features] }, 
-      cl.question.default, cl.question.type)
+      prob.question.default, prob.question.type)
      dec_tree.train
-      @@classifiers[cl] = dec_tree
+      @@classifiers[prob] = dec_tree
    else
-      dec_tree = @@classifiers[cl]
+      dec_tree = @@classifiers[prob]
      dec_tree.graph('testingbitch')
    end
-    dec_tree.predict(
+    vect = prob.export_features(entity, false)
-      cl.export_features(entity, false)
+    dec_tree.predict(vect)
    )
  end
 end
--- a/lib/treat/workers/learners/classifiers/linear.rb
+++ b/lib/treat/workers/learners/classifiers/linear.rb
@ -11,35 +11,23 @@ class Treat::Workers::Learners::Classifiers::Linear
  }
  def self.classify(entity, options = {})
    options = DefaultOptions.merge(options)
-    set = options[:training]
+    dset = options[:training]
-    problem = set.problem
+    prob, items = dset.problem, dset.items
-    
+    if !@@classifiers[prob]
-    if !@@classifiers[problem]
+      lparam = LParameter.new
-      labels = problem.question.labels
+      lparam.solver_type = options[:solver_type]
-      unless labels
+      lparam.eps = options[:eps]
-        raise Treat::Exception,
+      lbls = items.map { |it| it[:features][-1] }
-        "LibLinear requires that you provide the possible " +
+      exs = items.map { |it| it[:features][0..-2] }.
-        "labels to assign to classification items when " +
+      map { |ary| self.array_to_hash(ary) }
-        "specifying the question."
+      lprob = LProblem.new(lbls, exs, options[:bias])
-      end
+      model = LModel.new(lprob, lparam)
-      param = LParameter.new
+      @@classifiers[prob] = model
      param.solver_type = options[:solver_type]
      param.eps = options[:eps]
      bias = options[:bias]
      data = set.items.map do |item| 
        self.array_to_hash(item[:features]) 
      end
      prob = LProblem.new(labels, data, bias)
      @@classifiers[problem] = 
      LModel.new(prob, param)
    end
-
+    features = prob.export_features(entity, false)
-    @@classifiers[problem].predict(
+    @@classifiers[prob].predict(
-    self.array_to_hash(problem.
+    self.array_to_hash(features))
    export_features(entity, false)))
  end
  def self.array_to_hash(array)
--- a/lib/treat/workers/learners/classifiers/mlp.rb
+++ b/lib/treat/workers/learners/classifiers/mlp.rb
@ -1,30 +1,43 @@
 # Classification based on a multilayer perceptron.
 class Treat::Workers::Learners::Classifiers::MLP
-  require 'ai4r'
+  require 'ruby_fann/neural_network'
-  @@mlps = {}
+  DefaultOptions = {
    num_inputs: 3,
    hidden_neurons: [2, 8, 4, 3, 4],
    num_outputs: 1,
    max_neurons: 1000,
    neurons_between_reports: 1,
    desired_error: 0.1
  }
  @@classifiers = {}
  def self.classify(entity, options = {})
-    
+    options = DefaultOptions.merge(options)
-    set = options[:training]
+    dset = options[:training]
-    cl = set.problem
+    prob, items = dset.problem, dset.items
-      
+    if !@@classifiers[prob]
-    if !@@mlps[cl]
+      fann = RubyFann::Standard.new(options)
-      net = Ai4r::NeuralNetwork::Backpropagation.new(
+      inputs = items.map { |it| it[:features][0..-2] }
-      [cl.feature_labels.size, 3, 1])
+      outputs = items.map { |it| [it[:features][-1]] }
-      set.items.each do |item|
+      training = silence_stdout do 
-        inputs = item[:features][0..-2]
+        RubyFann::TrainData.new(inputs: 
-        outputs = [item[:features][-1]]
+        inputs, desired_outputs: outputs)
        net.train(inputs, outputs)
      end
-      @@mlps[cl] = net
+      params = [options[:max_neurons],
      options[:neurons_between_reports],
      options[:desired_error]]
      fann.train_on_data(training, *params)
      @@classifiers[prob] = fann
    else
-      net = @@mlps[cl]
+      fann = @@classifiers[prob]
    end
-    
+    vect = prob.export_features(entity, false)
-    net.eval(cl.export_features(entity, false))[0]
+    Treat.core.verbosity.silence ? 
-    
+    silence_stdout { fann.run(vect)[0] } :
    fann.run(vect)[0]
  end
 end
--- a/lib/treat/workers/learners/classifiers/svm.rb
+++ b/lib/treat/workers/learners/classifiers/svm.rb
@ -5,7 +5,7 @@ class Treat::Workers::Learners::Classifiers::SVM
  @@classifiers = {}
  DefaultOptions = {
-    cache_size: 1,
+    cache_size: 1, # in MB
    eps: 0.001,
    c: 10
  }
@ -14,35 +14,25 @@ class Treat::Workers::Learners::Classifiers::SVM
  # - (Numeric) :eps => tolerance of termination criterion
  # - (Numeric) :c => C parameter
  def self.classify(entity, options = {})
    options = DefaultOptions.merge(options)
-    set = options[:training]
+    dset = options[:training]
-    problem = set.problem
+    prob, items = dset.problem, dset.items
-    
+    if !@@classifiers[prob]
-    if !@@classifiers[problem]
+      lprob = Libsvm::Problem.new
-      labels = problem.question.labels
+      lparam = Libsvm::SvmParameter.new
-      unless labels
+      lparam.cache_size = options[:cache_size]
-        raise Treat::Exception,
+      lparam.eps = options[:eps]
-        "LibSVM requires that you provide the possible " +
+      lparam.c = options[:c]
-        "labels to assign to classification items when " +
+      llabels = items.map { |it| it[:features][-1] }
-        "specifying the question."
+      lexamples = items.map { |it| it[:features][0..-2] }.
-      end
+      map { |ary| Libsvm::Node.features(ary) }
-      examples = set.items.map  { |item| item[:features] }
+      lprob.set_examples(llabels, lexamples)
-      prob = Libsvm::Problem.new
+      model = Libsvm::Model.train(lprob, lparam)
-      prob.set_examples(labels, examples)
+      @@classifiers[prob] = model
      param = Libsvm::SvmParameter.new
      param.cache_size = options[:cache_size]
      param.eps = options[:eps]
      param.c = options[:c]
      model = Libsvm::Model.train(problem, parameter)
      @@classifiers[problem] = model
    end
-    
+    features = prob.export_features(entity, false)
-    features = problem.export_features(entity, false)
+    @@classifiers[prob].predict(
-    
+    Libsvm::Node.features(features))
    @@classifiers[problem].predict(
    Libsvm::Node.features(*features))
  end
 end
--- a/lib/treat/workers/lexicalizers/categorizers/from_tag.rb
+++ b/lib/treat/workers/lexicalizers/categorizers/from_tag.rb
@ -28,8 +28,9 @@ class Treat::Workers::Lexicalizers::Categorizers::FromTag
    tag = entity.check_has(:tag)
-    return 'unknown' if tag.nil? || tag == '' || entity.type == :symbol
+    return 'unknown' if tag.nil? || tag == ''
-    return 'sentence' if tag == 'S' || entity.type == :sentence
+    return 'fragment' if tag == 'F'
    return 'sentence' if tag == 'S'
    return 'number' if entity.type == :number
    return Ptc[entity.to_s] if entity.type == :punctuation
--- a/lib/treat/workers/lexicalizers/sensers/wordnet.rb
+++ b/lib/treat/workers/lexicalizers/sensers/wordnet.rb
@ -1,62 +1,79 @@
 # Sense information (synonyms, antonyms, hypernyms
 # and hyponyms) obtained through a Ruby parser that
 # accesses Wordnet flat files.
-# 
+#
-# Original paper: George A. Miller (1995). WordNet: 
+# Original paper: George A. Miller (1995). WordNet:
-# A Lexical Database for English. Communications of 
+# A Lexical Database for English. Communications of
 # the ACM Vol. 38, No. 11: 39-41.
 class Treat::Workers::Lexicalizers::Sensers::Wordnet
  # Require the 'wordnet' gem (install as 'rwordnet').
  require 'wordnet'
-  
+
  # Patch for bug.
  ::WordNet.module_eval do
-    remove_const(:SynsetType)
+    remove_const(:SYNSET_TYPES)
-    const_set(:SynsetType, 
+    const_set(:SYNSET_TYPES,
    {"n" => "noun", "v" => "verb", "a" => "adj"})
  end
-  
+
  # Require an adaptor for Wordnet synsets.
  require_relative 'wordnet/synset'
-  
+
  # Noun, adjective and verb indexes.
  @@indexes = {}
  # Obtain lexical information about a word using the
  # ruby 'wordnet' gem.
  def self.sense(word, options = nil)
-    
+
    category = word.check_has(:category)
-    
+
-    unless options[:nym]
+    if !options[:nym]
      raise Treat::Exception, "You must supply " +
-      "the :nym option (:synonym, :hypernym, etc.)"
+      "the :nym option ('synonyms', 'hypernyms', etc.)"
    end
-    
+
    if !options[:nym].is_a?(Symbol)
      options[:nym] = options[:nym].intern
    end
    if ![:synonyms, :antonyms,
      :hypernyms, :hyponyms].include?(options[:nym])
      raise Treat::Exception, "You must supply " +
      "a valid :nym option ('synonyms', 'hypernyms', etc.)"
    end
    unless ['noun', 'adjective', 'verb'].
      include?(word.category)
      return []
    end
-    
+
-    cat = category.to_s.capitalize
+    cat = abbreviate(category)
-    
+
-    @@indexes[cat] ||= 
+    lemma = ::WordNet::Lemma.find(word.value.downcase, cat)
    ::WordNet.const_get(cat + 'Index').instance
    lemma = @@indexes[cat].find(word.value.downcase)
    return [] if lemma.nil?
    synsets = []
-    
+
    lemma.synsets.each do |synset|
-      synsets << 
+      synsets <<
      Treat::Workers::Lexicalizers::Sensers::Wordnet::Synset.new(synset)
    end
-    
+
    ((synsets.collect do |ss|
      ss.send(options[:nym])
-    end - [word.value]).flatten).uniq
+    end - [word.value]).
-    
+    flatten).uniq.map do |token|
      token.gsub('_', ' ')
    end
  end
-end
+  def self.abbreviate category
    if category == 'adjective'
      :adj
    elsif category == 'adverb'
      :adv
    else
      category.to_sym
    end
  end
 end
--- a/lib/treat/workers/lexicalizers/taggers/brill.rb
+++ b/lib/treat/workers/lexicalizers/taggers/brill.rb
@ -40,15 +40,15 @@ class Treat::Workers::Lexicalizers::Taggers::Brill
      return pair[1] if isolated_token
    end
-    if entity.is_a?(Treat::Entities::Sentence) ||
+    if entity.is_a?(Treat::Entities::Group) && 
-      (entity.is_a?(Treat::Entities::Phrase) && 
+      !entity.parent_sentence
      !entity.parent_sentence)
        entity.set :tag_set, :penn
    end
    return 'S' if entity.is_a?(Treat::Entities::Sentence)
    return 'P' if entity.is_a?(Treat::Entities::Phrase)
-
+    return 'F' if entity.is_a?(Treat::Entities::Fragment)
    return 'G' if entity.is_a?(Treat::Entities::Group)
  end
 end
--- a/lib/treat/workers/lexicalizers/taggers/lingua.rb
+++ b/lib/treat/workers/lexicalizers/taggers/lingua.rb
@ -61,15 +61,16 @@ class Treat::Workers::Lexicalizers::Taggers::Lingua
    end
-    if entity.is_a?(Treat::Entities::Sentence) ||
+    if entity.is_a?(Treat::Entities::Group) && 
-      (entity.is_a?(Treat::Entities::Phrase) && 
+      !entity.parent_sentence
      !entity.parent_sentence)
        entity.set :tag_set, :penn
    end
-
+    
    return 'S' if entity.is_a?(Treat::Entities::Sentence)
    return 'P' if entity.is_a?(Treat::Entities::Phrase)
-    
+    return 'F' if entity.is_a?(Treat::Entities::Fragment)
    return 'G' if entity.is_a?(Treat::Entities::Group)
  end
 end
--- a/lib/treat/workers/lexicalizers/taggers/stanford.rb
+++ b/lib/treat/workers/lexicalizers/taggers/stanford.rb
@ -1,15 +1,15 @@
-# POS tagging using (i) explicit use of both preceding 
+# POS tagging using a maximum entropy model, with (i) 
-# and following tag contexts via a dependency network 
+# explicit use of both preceding and following tag 
-# representation, (ii) broad use of lexical features, 
+# contexts via a dependency network representation, 
-# including jointly conditioning on multiple consecutive 
+# (ii) broad use of lexical features, including jointly 
-# words, (iii) effective use of priors in conditional 
+# conditioning on multiple consecutive words, (iii) 
-# loglinear models, and (iv) ﬁne-grained modeling of 
+# effective use of priors in conditional loglinear models, 
-# unknown word features.
+# and (iv) ﬁne-grained modeling of unknown word features.
 #
 # Original paper: Toutanova, Manning, Klein and Singer.
-# 2003. Feature-Rich Part-of-Speech Tagging with a 
+# 2003. Feature-Rich Part-of-Speech Tagging with a
-# Cyclic Dependency Network. In Proceedings of the 
+# Cyclic Dependency Network. In Proceedings of the
-# Conference of the North American Chapter of the 
+# Conference of the North American Chapter of the
 # Association for Computational Linguistics.
 class Treat::Workers::Lexicalizers::Taggers::Stanford
@ -25,34 +25,32 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
  def self.tag(entity, options = {})
    # Handle tags for sentences and phrases.
-    if entity.is_a?(Treat::Entities::Sentence) ||
+    if entity.is_a?(Treat::Entities::Group) &&
-      (entity.is_a?(Treat::Entities::Phrase) &&
+      !entity.parent_sentence
      !entity.parent_sentence)
      tag_set = options[:tag_set]
      entity.set :tag_set, tag_set
    end
-    if entity.is_a?(Treat::Entities::Sentence)
+    return 'S' if entity.is_a?(Treat::Entities::Sentence)
-      return 'S'
+    return 'P' if entity.is_a?(Treat::Entities::Phrase)
-    elsif entity.is_a?(Treat::Entities::Phrase)
+    return 'F' if entity.is_a?(Treat::Entities::Fragment)
-      return 'P'
+    return 'G' if entity.is_a?(Treat::Entities::Group)
-    end
+
    # Handle options and initialize the tagger.
-    lang = entity.language
+    lang = entity.language.intern
    options = get_options(options, lang)
    init_tagger(lang) unless @@taggers[lang]
-    tokens, list = get_token_list(entity)
+    options = get_options(options, lang)
    tokens, t_list = get_token_list(entity)
    # Do the tagging.
    i = 0
    isolated_token = entity.is_a?(Treat::Entities::Token)
-    @@taggers[lang].apply(list).each do |tok|
+    @@taggers[lang].apply(t_list).each do |tok|
-      tokens[i].set :tag, tok.tag
+      tokens[i].set(:tag, tok.tag.split('-').first)
-      tokens[i].set :tag_set,
+      tokens[i].set(:tag_set,
-      options[:tag_set] if isolated_token
+      options[:tag_set]) if isolated_token
      return tok.tag if isolated_token
      i += 1
    end
@ -61,21 +59,24 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
  # Initialize the tagger for a language.
  def self.init_tagger(language)
-    Treat::Loaders::Stanford.load(language)
+    unless @@taggers[language]
-    model = StanfordCoreNLP::Config::Models[:pos][language]
+      Treat::Loaders::Stanford.load(language)
-    model_path = Treat.libraries.stanford.model_path || 
+      unless StanfordCoreNLP.const_defined?('MaxentTagger')
-    Treat.paths.models + 'stanford/'
+        StanfordCoreNLP.load_class('MaxentTagger', 
-    model = model_path + StanfordCoreNLP::
+        'edu.stanford.nlp.tagger.maxent')
-    Config::ModelFolders[:pos] + model
+      end
-    @@taggers[language] ||=
+      model = Treat::Loaders::Stanford.find_model(:pos,language)
-    StanfordCoreNLP::MaxentTagger.new(model)
+      tagger = StanfordCoreNLP::MaxentTagger.new(model)
      @@taggers[language] = tagger
    end
    @@taggers[language]
  end
  # Handle the options for the tagger.
  def self.get_options(options, language)
    options = DefaultOptions.merge(options)
    if options[:tagger_model]
-      ::StanfordCoreNLP.set_model('pos.model',
+      StanfordCoreNLP.set_model('pos.model',
      options[:tagger_model])
    end
    options[:tag_set] =
--- a/lib/treat/workers/processors/chunkers/autoselect.rb
+++ b/lib/treat/workers/processors/chunkers/autoselect.rb
@ -2,16 +2,13 @@ class Treat::Workers::Processors::Chunkers::Autoselect
  def self.chunk(entity, options = {})
    unless entity.has?(:format)
-      raise Treat::Exception,
+      entity.set :format, 'txt'
      "Must have a format to autoselect chunker."
    end
    begin
-      k = Treat::Workers::Processors::
+      k = Treat::Workers::Processors::Chunkers.const_get(entity.format.cc)
      Chunkers.const_get(entity.format.cc)
      k.chunk(entity, options)
    rescue Treat::Exception
-      Treat::Workers::Processors::
+      Treat::Workers::Processors::Chunkers::TXT.chunk(entity, options)
      Chunkers::TXT.chunk(entity, options)
    end
  end
--- a/lib/treat/workers/processors/chunkers/html.rb
+++ b/lib/treat/workers/processors/chunkers/html.rb
@ -3,12 +3,9 @@ class Treat::Workers::Processors::Chunkers::HTML
  require 'nokogiri'
  def self.chunk(entity, options = {})
    entity.check_hasnt_children
    doc = Nokogiri::HTML(entity.value)
-    recurse(entity, doc)
+    self.recurse(entity, doc)
  end
  def self.recurse(node, html_node, level = 1)
@ -16,7 +13,6 @@ class Treat::Workers::Processors::Chunkers::HTML
    html_node.children.each do |child|
      next if child.name == 'text'
      txt = child.inner_text
      if child.name =~ /^h([0-9]{1})$/ ||
--- a/lib/treat/workers/processors/chunkers/txt.rb
+++ b/lib/treat/workers/processors/chunkers/txt.rb
@ -12,16 +12,13 @@ class Treat::Workers::Processors::Chunkers::TXT
    zones.each do |zone|
      zone.strip!
      next if zone == ''
-      c = Treat::Entities::
+      c = Treat::Entities::Zone.from_string(zone)
      Zone.from_string(zone)
      if c.type == :title
        if current.type == :section
          current = current.parent
-          current = entity << Treat::
+          current = entity << Treat::Entities::Section.new
          Entities::Section.new
        else
-          current = entity << Treat::
+          current = entity << Treat::Entities::Section.new
          Entities::Section.new
        end
      end
      current << c
--- a/lib/treat/workers/processors/parsers/stanford.rb
+++ b/lib/treat/workers/processors/parsers/stanford.rb
@ -1,150 +1,88 @@
-# Parsing using an interface to a Java implementation 
+# Parsing using an interface to a Java implementation
-# of probabilistic natural language parsers, both 
+# of probabilistic natural language parsers, both
-# optimized PCFG and lexicalized dependency parsers, 
+# optimized PCFG and lexicalized dependency parsers,
-# and a lexicalized PCFG parser. 
+# and a lexicalized PCFG parser.
-# 
+#
-# Original paper: Dan Klein and Christopher D. 
+# Original paper: Dan Klein and Christopher D.
-# Manning. 2003. Accurate Unlexicalized Parsing. 
+# Manning. 2003. Accurate Unlexicalized Parsing.
-# Proceedings of the 41st Meeting of the Association 
+# Proceedings of the 41st Meeting of the Association
 # for Computational Linguistics, pp. 423-430.
 class Treat::Workers::Processors::Parsers::Stanford
-  
+
  Pttc = Treat.tags.aligned.phrase_tags_to_category
-  
+
  # Hold one instance of the pipeline per language.
  @@parsers = {}
-  DefaultOptions = {
+  DefaultOptions = { model: nil }
    :parser_model => nil,
    :tagger_model => nil
  }
  # Parse the entity using the Stanford parser.
  #
  # Options:
  #
  # - (Boolean) :silent => whether to silence the output
  #   of the JVM.
  # - (String) :log_file => a filename to log output to
  # instead of displaying it.
  def self.parse(entity, options = {})
-    entity.check_hasnt_children
+    val, lang = entity.to_s, entity.language.intern
-    val = entity.to_s
+    Treat::Loaders::Stanford.load(lang)
    lang = entity.language
    init(lang, options)
    tag_set = StanfordCoreNLP::Config::TagSets[lang]
-    text = ::StanfordCoreNLP::Text.new(val)
+    list = get_token_list(entity)
-    @@parsers[lang].annotate(text)
+    entity.remove_all!
-
+    
-    text.get(:sentences).each do |s|
+    model_file     = options[:model] || 
-      
+    StanfordCoreNLP::Config::Models[:parse][lang]
-      if entity.is_a?(Treat::Entities::Sentence) ||
+    
-        entity.is_a?(Treat::Entities::Phrase)
+    unless @@parsers[lang] && @@parsers[lang][model_file]
-        tag = s.get(:category).to_s
+      model_path   = Treat.libraries.stanford.model_path ||
-        tag_s, tag_opt = *tag.split('-')
+                     StanfordCoreNLP.model_path
-        tag_s ||= 'S'
+      model_folder = StanfordCoreNLP::Config::ModelFolders[:parse]
-        entity.set :tag, tag_s
+      model = File.join(model_path, model_folder, model_file)
-        entity.set :tag_opt, tag_opt if tag_opt
+      @@parsers[lang] ||= {}
-        recurse(s.get(:tree).children[0], entity, tag_set)
+      options = StanfordCoreNLP::Options.new
-        break #######
+      parser = StanfordCoreNLP::LexicalizedParser
-      else
+      .getParserFromFile(model, options)
-        recurse(s.get(:tree), entity, tag_set)
+      @@parsers[lang][model_file] = parser
      end
    end
-
+    
    parser = @@parsers[lang][model_file]
    text = parser.apply(list)
    recurse(text.children[0], entity, tag_set)
    entity.set :tag_set, tag_set
-    
+
  end
-  def self.init(lang, options)
+  def self.recurse(java_node, ruby_node, tag_set)
    return if @@parsers[lang]
-    Treat::Loaders::Stanford.load(lang)
+    java_node.children.each do |java_child|
    options = DefaultOptions.merge(options)
    StanfordCoreNLP.use(lang)
    if options[:tagger_model]
      ::StanfordCoreNLP.set_model(
      'pos.model', options[:tagger_model]
      )
    end
    if options[:parser_model]
      ::StanfordCoreNLP.set_model(
      'parser.model', options[:parser_model]
      )
    end
    @@parsers[lang] ||=
    ::StanfordCoreNLP.load(
    :tokenize, :ssplit, :pos, :lemma, :parse
    )
  end
-  # Helper method which recurses the tree supplied by
+      label = java_child.label
-  # the Stanford parser.
+      tag = label.get(:category).to_s
  def self.recurse(java_node, ruby_node, tag_set, additional_tags = [])
-    if java_node.num_children == 0
+      if Pttc[tag] && Pttc[tag][tag_set]
-
+        ruby_child = Treat::Entities::Phrase.new
-      label = java_node.label
+        ruby_child.set :tag, tag
      tag = label.get(:part_of_speech).to_s
      tag_s, tag_opt = *tag.split('-')
      tag_s ||= ''
      ruby_node.value = java_node.value.to_s.strip
      ruby_node.set :tag, tag_s
      ruby_node.set :tag_opt, tag_opt if tag_opt
      ruby_node.set :lemma, label.get(:lemma).to_s
      additional_tags.each do |t|
        lt = label.get(t)
        ruby_node.set t, lt.to_s if lt
      end
      ruby_node
    else
      if java_node.num_children == 1 &&
        java_node.children[0].num_children == 0
        recurse(java_node.children[0],
        ruby_node, tag_set, additional_tags)
        return
      end
      java_node.children.each do |java_child|
        label = java_child.label
        tag = label.get(:category).to_s
        tag_s, tag_opt = *tag.split('-')
        tag_s ||= ''
        if Pttc[tag_s] && Pttc[tag_s][tag_set]
          ruby_child = Treat::Entities::Phrase.new
        else
          l = java_child.children[0].to_s
          v = java_child.children[0].value.to_s.strip
          # Mhmhmhmhmhm
          val = (l == v) ? v :  l.split(' ')[-1].gsub(')', '')
          ruby_child = Treat::Entities::Token.from_string(val)
        end
        ruby_child.set :tag, tag_s
        ruby_child.set :tag_opt, tag_opt if tag_opt
        ruby_node << ruby_child
        unless java_child.children.empty?
-          recurse(java_child, ruby_child, tag_set, additional_tags)
+          recurse(java_child, ruby_child, tag_set)
        end
-
+      else
        val = java_child.children[0].to_s
        ruby_child = Treat::Entities::Token.from_string(val)
        ruby_child.set :tag, tag
        ruby_node << ruby_child
      end
-
+      
    end
  end
-  
+
  def self.get_token_list(entity)
    list = StanfordCoreNLP::ArrayList.new
    entity.tokens.each do |token|
      list.add(StanfordCoreNLP::Word.new(token.to_s))
    end
    list
  end
 end
--- a/lib/treat/workers/processors/segmenters/punkt.rb
+++ b/lib/treat/workers/processors/segmenters/punkt.rb
@ -13,7 +13,7 @@ class Treat::Workers::Processors::Segmenters::Punkt
  silence_warnings { require 'punkt-segmenter' }
  # Require the YAML parser.
-  silence_warnings { require 'psych' }
+  # silence_warnings { require 'psych' }
  # Hold one copy of the segmenter per language.
  @@segmenters = {}
@ -87,7 +87,7 @@ class Treat::Workers::Processors::Segmenters::Punkt
      end
    end
-    t = ::Psych.load(File.read(model))
+    t = ::YAML.load(File.read(model))
    @@segmenters[lang] =
    ::Punkt::SentenceTokenizer.new(t)
--- a/Show More
+++ b/Show More
`@ -1,2 +1,2 @@`
	`--format s -c`	`--format d -c`
	`--order rand`	`--order rand`