232 changed files with 3832 additions and 6443 deletions
--- a/.gitignore
+++ b/.gitignore
@ -12,6 +12,4 @@
 *.html
 *.yaml
 spec/sandbox.rb
 coverage/*
 benchmark/*
 TODO
--- a/.rspec
+++ b/.rspec
@ -1,2 +1,2 @@
--format d -c
+--format s -c
 --order rand
--- a/.travis.yml
+++ b/.travis.yml
@ -1,18 +1,11 @@
 language: ruby
 rvm:
  - 1.9.2
  - 1.9.3
  - 2.0
  - 2.1
  - 2.2
 before_install:
-  - export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk-i386/"
+  - export "JAVA_HOME=/usr/lib/jvm/java-6-openjdk/"
-
+before_script: 
 before_script:
  - sudo apt-get install antiword
  - sudo apt-get install poppler-utils
-  - rake treat:install[travis] --trace
+  - rake treat:install[travis]
-
+script: rake treat:spec
 script: rake treat:spec --trace
--- a/.treat
+++ b/.treat
@ -1,35 +0,0 @@
 # A boolean value indicating whether to silence 
 # the output of external libraries (e.g. Stanford 
 # tools, Enju, LDA, Ruby-FANN, Schiphol).
 Treat.core.verbosity.silence = false
 # A boolean value indicating whether to explain 
 # the steps that Treat is performing.
 Treat.core.verbosity.debug = true
 # A boolean value indicating whether Treat should 
 # try to detect the language of newly input text.
 Treat.core.language.detect = false
 # A string representing the language to default 
 # to when detection is off.
 Treat.core.language.default = 'english'
 # A symbol representing the finest level at which
 # language detection should be performed if language
 # detection is turned on.
 Treat.core.language.detect_at = :document
 # The directory containing executables and JAR files.
 Treat.paths.bin = '##_INSTALLER_BIN_PATH_##'
 # The directory containing trained models
 Treat.paths.models = '##_INSTALLER_MODELS_PATH_##'
 # Mongo database configuration.
 Treat.databases.mongo.db = 'your_database'
 Treat.databases.mongo.host = 'localhost'
 Treat.databases.mongo.port = '27017'
 # Include the DSL by default.
 include Treat::Core::DSL
--- a/39
+++ b/39
@ -1,45 +1,12 @@
-source 'https://rubygems.org'
+source :rubygems
 gemspec
 gem 'birch'
 gem 'schiphol'
-gem 'yomu'
+gem 'sourcify'
 gem 'ruby-readability'
 gem 'nokogiri'
 group :test do
  gem 'rspec'
  gem 'rake'
-  gem 'terminal-table'
+end
  gem 'simplecov'
 end
 =begin
 gem 'linguistics'
 gem 'engtagger'
 gem 'open-nlp'
 gem 'stanford-core-nlp'
 gem 'rwordnet'
 gem 'scalpel'
 gem 'fastimage'
 gem 'decisiontree'
 gem 'whatlanguage'
 gem 'zip'
 gem 'nickel'
 gem 'tactful_tokenizer'
 gem 'srx-english'
 gem 'punkt-segmenter'
 gem 'chronic'
 gem 'uea-stemmer'
 gem 'rbtagger'
 gem 'ruby-stemmer'
 gem 'activesupport'
 gem 'rb-libsvm'
 gem 'tomz-liblinear-ruby-swig'
 gem 'ruby-fann'
 gem 'fuzzy-string-match'
 gem 'levenshtein-ffi'
 gem 'tf-idf-similarity'
 gem 'kronic'
 =end
--- a/4
+++ b/4
@ -1,4 +1,4 @@
-Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 2.0.0
+Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.2
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@ -15,7 +15,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 Author: Louis-Antoine Mullie (louis.mullie@gmail.com). Copyright 2011-12.
-A non-trivial amount of code has been incorporated and modified from other libraries:
+Non-trivial amount of code has been incorporated and modified from other libraries:
 - formatters/readers/odt.rb - Mark Watson (GPL license)
 - processors/tokenizers/tactful.rb - Matthew Bunday (GPL license)
--- a/README.md
+++ b/README.md
@ -1,43 +1,34 @@
 [![Build Status](https://secure.travis-ci.org/louismullie/treat.png)](http://travis-ci.org/#!/louismullie/treat)
-[![Code Climate](https://codeclimate.com/github/louismullie/treat.png)](https://codeclimate.com/github/louismullie/treat)
+[![Dependency Status](https://gemnasium.com/louismullie/treat.png)](https://gemnasium.com/louismullie/treat)
 Treat is a framework for natural language processing and computational linguistics in Ruby. It provides a common API for a number of gems and external libraries for document retrieval, parsing, annotation, and information extraction.
-![Treat Logo](http://www.louismullie.com/treat/treat-logo.jpg)
+**Current features**
 **New in v2.0.5: [OpenNLP integration](https://github.com/louismullie/treat/commit/727a307af0c64747619531c3aa355535edbf4632) and [Yomu support](https://github.com/louismullie/treat/commit/e483b764e4847e48b39e91a77af8a8baa1a1d056)**
 Treat is a toolkit for natural language processing and computational linguistics in Ruby. The Treat project aims to build a language- and algorithm- agnostic NLP framework for Ruby with support for tasks such as document retrieval, text chunking, segmentation and tokenization, natural language parsing, part-of-speech tagging, keyword extraction and named entity recognition. Learn more by taking a [quick tour](https://github.com/louismullie/treat/wiki/Quick-Tour) or by reading the [manual](https://github.com/louismullie/treat/wiki/Manual).
 **Features**
 * Text extractors for PDF, HTML, XML, Word, AbiWord, OpenOffice and image formats (Ocropus).
-* Text chunkers, sentence segmenters, tokenizers, and parsers (Stanford & Enju).
+* Text retrieval with indexation and full-text search (Ferret).
-* Lexical resources (WordNet interface, several POS taggers for English).
+* Text chunkers, sentence segmenters, tokenizers, and parsers for several languages (Stanford & Enju).
 * Language, date/time, topic words (LDA) and keyword (TF*IDF) extraction.
 * Word inflectors, including stemmers, conjugators, declensors, and number inflection.
-* Serialization of annotated entities to YAML, XML or to MongoDB.
+* Lexical resources (WordNet interface, several POS taggers for English, Stanford taggers for several languages).
 * Language, date/time, topic words (LDA) and keyword (TF*IDF) extraction.
 * Serialization of annotated entities to YAML, XML formats or to MongoDB.
 * Visualization in ASCII tree, directed graph (DOT) and tag-bracketed (standoff) formats.
 * Linguistic resources, including language detection and tag alignments for several treebanks.
-* Machine learning (decision tree, multilayer perceptron, LIBLINEAR, LIBSVM).
+* Decision tree and multilayer perceptron classification (liblinear coming soon!)
 * Text retrieval with indexation and full-text search (Ferret).
-**Contributing**
+<br>
-I am actively seeking developers that can help maintain and expand this project. You can find a list of ideas for contributing to the project [here](https://github.com/louismullie/treat/wiki/Contributing).
+**Resources**
-**Authors**
+* Read the [latest documentation](http://rubydoc.info/github/louismullie/treat/frames).
-
+* See how to [install Treat](https://github.com/louismullie/treat/wiki/Installation).
-Lead developper: @louismullie [[Twitter](https://twitter.com/LouisMullie)]
+* Learn how to [use Treat](https://github.com/louismullie/treat/wiki/Manual).
-
+* Help out by [contributing to the project](https://github.com/louismullie/treat/wiki/Contributing).
-Contributors:
+* View a list of [papers](https://github.com/louismullie/treat/wiki/Papers) about tools included in this toolkit.
-
+* Open an [issue](https://github.com/louismullie/treat/issues).
- @bdigital
+ 
- @automatedtendencies
+<br>
 - @LeFnord
 - @darkphantum
 - @whistlerbrk
 - @smileart
 - @erol
 **License**
-This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
+This software is released under the [GPL License](https://github.com/louismullie/treat/wiki/License-Information) and includes software released under the GPL, Ruby, Apache 2.0 and MIT licenses.
--- a/14
+++ b/14
@ -41,16 +41,4 @@ Treat - Text Retrieval, Extraction and Annotation Toolkit
 1.1.0
  * Complete refactoring of the core of the library.
-  * Separated all configuration stuff from dynamic stuff.
+  * Separated all configuration stuff from dynamic stuff.
 1.2.0
  * Added LIBSVM and LIBLINEAR classifier support.
  * Added support for serialization of documents and data sets to MongoDB.
  * Added specs for most of the core classes.
  * Several bug fixes.
 2.0.0rc1
  * MAJOR CHANGE: the old DSL is no longer supported. A new DSL style using
    lowercase keywords is now used and must be required explicitly.
--- a/58
+++ b/58
@ -1,47 +1,29 @@
-# All commands are prefixed with "treat:".
+require 'date'
 require 'rspec/core/rake_task'
 task :default => :spec
 namespace :treat do
-  # Require the Treat library.
+  RSpec::Core::RakeTask.new do |t|
-  require_relative 'lib/treat'
+    task = ARGV[0].scan(/\[([a-z]*)\]/)
-  
+    if task && task.size == 0
-  # Sandbox a script, for development.
+      t.pattern = "./spec/*.rb"
-  # Syntax: rake treat:sandbox
+    else
-  task :sandbox do
+      t.pattern = "./spec/#{task[0][0]}.rb"
-    require_relative 'spec/sandbox'
+    end
  end
-  
+
  # Prints the current version of Treat.
  # Syntax: rake treat:version
  task :version do
-    puts Treat::VERSION
+    vpath = '../lib/treat/version.rb'
    vfile = File.expand_path(vpath, __FILE__)
    contents = File.read(vfile)
    puts contents[/VERSION = "([^"]+)"/, 1]
  end
  # Installs a language pack (default to english).
  # A language pack is a set of gems, binaries and
  # model files that support the various workers 
  # that are available for that particular language.
  # Syntax: rake treat:install (installs english)
  # - OR -  rake treast:install[some_language]
  task :install, [:language] do |t, args|
-    language = args.language || 'english'
+    require './lib/treat'
-    Treat::Core::Installer.install(language)
+    Treat.install(args.language || 'english')
  end
  # Runs 1) the core library specs and 2) the 
  # worker specs for a) all languages (default) 
  # or b) a specific language (if specified).
  # Also outputs the coverage for the whole 
  # library to treat/coverage (using SimpleCov).
  # N.B. the worker specs are dynamically defined
  # following the examples found in spec/workers.
  # (see /spec/language/workers for more info)
  # Syntax: rake treat:spec (core + all langs)
  # - OR -  rake treat:spec[some_language]
  task :spec, [:language] do |t, args|
    require_relative 'spec/helper'
    Treat::Specs::Helper.start_coverage
    Treat::Specs::Helper.run_library_specs
    Treat::Specs::Helper.run_language_specs(args.language)
  end
-end
+end
--- a/lib/treat.rb
+++ b/lib/treat.rb
@ -1,23 +1,36 @@
 # Treat is a toolkit for natural language 
 # processing and computational linguistics 
 # in Ruby. The Treat project aims to build 
 # a language- and algorithm- agnostic NLP 
 # framework for Ruby with support for tasks 
 # such as document retrieval, text chunking, 
 # segmentation and tokenization, natural 
 # language parsing, part-of-speech tagging, 
 # keyword mining and named entity recognition.
 #
 # Author: Louis-Antoine Mullie (c) 2010-12.
 # 
 # Released under the General Public License.
 module Treat
-  # * Load all the core classes. * #
+  # Treat requires Ruby >= 1.9.2
-  require_relative 'treat/version'
+  if RUBY_VERSION < '1.9.2'
-  require_relative 'treat/exception'
+    raise "Treat requires Ruby version 1.9.2 " +
-  require_relative 'treat/autoload'
+    "or higher, but current is #{RUBY_VERSION}."
-  require_relative 'treat/modules'
+  end
-  require_relative 'treat/builder'
+  
  # Custom exception class.
  class Exception < ::Exception; end
  # Load configuration options.
  require 'treat/config'
  # Load all workers.
  require 'treat/helpers'
  # Require library loaders.
  require 'treat/loaders'
  # Require all core classes.
  require 'treat/core'
  # Require all entity classes.
  require 'treat/entities'
  # Lazy load worker classes.
  require 'treat/workers'
  # Require proxies last.
  require 'treat/proxies'
  # Turn sugar on.
  Treat::Config.sweeten!
  # Install packages for a given language.
  def self.install(language = :english)
    require 'treat/installer'
    Treat::Installer.install(language)
  end
 end
--- a/lib/treat/autoload.rb
+++ b/lib/treat/autoload.rb
@ -1,44 +0,0 @@
 # Basic mixin for all the main modules;
 # takes care of requiring the right files 
 # in the right order for each one.
 # 
 # If a module's folder (e.g. /entities) 
 # contains a file with a corresponding
 # singular name (e.g. /entity), that 
 # base class is required first. Then, 
 # all the files that are found directly 
 # under that folder are required (but
 # not those found in sub-folders).
 module Treat::Autoload
  # Loads all the files for the base
  # module in the appropriate order.
  def self.included(base)
    m = self.get_module_name(base)
    d = self.get_module_path(m)
    n = self.singularize(m) + '.rb'
    f, p = File.join(d, n), "#{d}/*.rb"
    require f if File.readable?(f)
    Dir.glob(p).each { |f| require f }
  end
  # Returns the path to a module's dir.
  def self.get_module_path(name)
    file = File.expand_path(__FILE__)
    dirs = File.dirname(file).split('/')
    File.join(*dirs[0..-1], name)
  end
  # Return the downcased form of the
  # module's last name (e.g. "entities").
  def self.get_module_name(mod)
    mod.to_s.split('::')[-1].downcase
  end
  # Helper method to singularize words.
  def self.singularize(w)
    if w[-3..-1] == 'ies'; w[0..-4] +  'y'
    else; (w[-1] == 's' ? w[0..-2] : w); end
  end
 end
--- a/lib/treat/builder.rb
+++ b/lib/treat/builder.rb
@ -1,6 +0,0 @@
 class Treat::Builder
  include Treat::Core::DSL
  def initialize(&block)
    instance_exec(&block)
  end
 end
--- a/lib/treat/config.rb
+++ b/lib/treat/config.rb
@ -0,0 +1,135 @@
 module Treat::Config
  Paths = [ :tmp, :lib, :bin, 
  :files, :data, :models, :spec ]
  class << self
    attr_accessor :config
  end
  Treat.module_eval do
    # Handle all missing methods as conf options.
    def self.method_missing(sym, *args, &block)
      super(sym, *args, &block) if sym == :to_ary
      Treat::Config.config[sym]
    end
  end
  def self.configure
    # Temporary configuration hash.
    config = { paths: {} }
    confdir = get_full_path(:lib) + 'treat/config'
    # Iterate over each directory in the config.
    Dir[confdir + '/*'].each do |dir|
      name = File.basename(dir, '.*').intern
      config[name] = {}
      # Iterate over each file in the directory.
      Dir[confdir + "/#{name}/*.rb"].each do |file|
        key = File.basename(file, '.*').intern
        config[name][key] = eval(File.read(file))
      end
    end
    # Get the path config.
    Paths.each do |path|
      config[:paths][path] = get_full_path(path)
    end
    # Get the tag alignments.
    configure_tags!(config[:tags][:aligned])
    # Convert hash to structs.
    self.config = self.hash_to_struct(config)
  end
  def self.get_full_path(dir)
    File.dirname(__FILE__) +
    '/../../' + dir.to_s + "/"
  end
  def self.configure_tags!(config)
    ts = config[:tag_sets]
    config[:word_tags_to_category] = 
    align_tags(config[:word_tags], ts)
    config[:phrase_tags_to_category] =
    align_tags(config[:phrase_tags], ts)
  end
  # Align tag configuration.
  def self.align_tags(tags, tag_sets)
    wttc = {}
    tags.each_slice(2) do |desc, tags|
      category = desc.gsub(',', ' ,').
      split(' ')[0].downcase
      tag_sets.each_with_index do |tag_set, i|
        next unless tags[i]
        wttc[tags[i]] ||= {}
        wttc[tags[i]][tag_set] = category
      end
    end
    wttc
  end
  def self.hash_to_struct(hash)
    return hash if hash.keys.
    select { |k| !k.is_a?(Symbol) }.size > 0
    struct = Struct.new(
    *hash.keys).new(*hash.values)
    hash.each do |key, value|
      if value.is_a?(Hash)
        struct[key] =
        self.hash_to_struct(value)
      end
    end
    struct
  end
  # Turn on syntactic sugar.
  def self.sweeten!
    # Undo this in unsweeten! - # Fix
    Treat::Entities.module_eval do 
      self.constants.each do |type|
        define_singleton_method(type) do |value='', id=nil|
          const_get(type).build(value, id)
        end
      end
    end
    return if Treat.core.syntax.sweetened
    Treat.core.syntax.sweetened = true
    Treat.core.entities.list.each do |type|
      next if type == :Symbol
      kname = cc(type).intern
      klass = Treat::Entities.const_get(kname)
      Object.class_eval do
        define_method(kname) do |val, opts={}|
          klass.build(val, opts)
        end
      end
    end
    Treat::Core.constants.each do |kname|
      Object.class_eval do
        klass = Treat::Core.const_get(kname)
        define_method(kname) do |*args|
          klass.new(*args)
        end
      end
    end
  end
  # Turn off syntactic sugar.
  def self.unsweeten!
    return unless Treat.core.syntax.sweetened
    Treat.core.syntax.sweetened = false
    Treat.core.entities.list.each do |type|
      name = cc(type).intern
      next if type == :Symbol
      Object.class_eval { remove_method(name) }
    end
  end
  # Run all configuration.
  self.configure
 end
--- a/lib/treat/config/config.rb
+++ b/lib/treat/config/config.rb
@ -1,38 +0,0 @@
 # This module uses structs to represent the 
 # configuration options that are stored in 
 # the /config folder.
 module Treat::Config
  # Require configurable mix in.
  require_relative 'importable'
  # Make all configuration importable.
  extend Treat::Config::Importable
  # Core configuration options for entities.
  class Treat::Config::Entities; end
  # Configuration for paths to models, binaries,
  # temporary storage and file downloads.
  class Treat::Config::Paths; end
  # Configuration for all Treat workers.
  class Treat::Config::Workers; end
  # Helpful linguistic options.
  class Treat::Config::Linguistics; end
  # Supported workers for each language.
  class Treat::Config::Languages; end
  # Configuration options for external libraries.
  class Treat::Config::Libraries; end
  # Configuration options for database 
  # connectivity (host, port, etc.)
  class Treat::Config::Databases; end
  # Configuration options for Treat core.
  class Treat::Config::Core; end
 end
--- a/lib/treat/config/configurable.rb
+++ b/lib/treat/config/configurable.rb
@ -1,51 +0,0 @@
 # Provide default functionality to load configuration
 # options from flat files into their respective modules.
 module Treat::Config::Configurable
  # When extended, add the .config property to
  # the class that is being operated on.
  def self.extended(base)
    class << base; attr_accessor :config; end
    base.class_eval { self.config = {} }
  end
  # Provide base functionality to configure 
  # all modules. The behaviour is as follows:
  # 
  # 1 - Check if a file named data/$CLASS$.rb
  # exists; if so, load that file as the base 
  # configuration, i.e. "Treat.$CLASS$"; e.g. 
  # "Treat.core"
  # 
  # 2 - Check if a folder named data/$CLASS$
  # exists; if so, load each file in that folder
  # as a suboption of the main configuration,
  # i.e. "Treat.$CLASS$.$FILE$"; e.g. "Treat.workers"
  # 
  # (where $CLASS$ is the lowercase name of 
  # the concrete class being extended by this.)
  def configure!
    path = File.dirname(File.expand_path(         # FIXME
    __FILE__)).split('/')[0..-4].join('/') + '/'
    main_dir = path + 'lib/treat/config/data/'
    mod_name = self.name.split('::')[-1].downcase
    conf_dir = main_dir + mod_name
    base_file = main_dir + mod_name + '.rb'
    if File.readable?(base_file)
      self.config = eval(File.read(base_file))
    elsif FileTest.directory?(conf_dir)
      self.config = self.from_dir(conf_dir)
    else; raise Treat::Exception,
      "No config file found for #{mod_name}."
    end
  end
  # * Helper methods for configuraton * #
  def from_dir(conf_dir)
    Hash[Dir[conf_dir + '/*'].map do |path|
      name = File.basename(path, '.*').intern
      [name, eval(File.read(path))]
    end]
  end
 end
--- a/lib/treat/config/core/acronyms.rb
+++ b/lib/treat/config/core/acronyms.rb
@ -0,0 +1,4 @@
 ['xml', 'html', 'txt', 'odt', 
 'abw', 'doc', 'yaml', 'uea', 
 'lda', 'pdf', 'ptb', 'dot', 
 'ai', 'id3', 'svo', 'mlp' ]
--- a/lib/treat/config/core/encodings.rb
+++ b/lib/treat/config/core/encodings.rb
@ -0,0 +1,8 @@
 {language_to_code: {
  arabic: 'UTF-8',
  chinese: 'GB18030',
  english: 'UTF-8',
  french: 'ISO_8859-1',
  ferman: 'ISO_8859-1',
  hebrew: 'UTF-8'
 }}
--- a/lib/treat/config/core/entities.rb
+++ b/lib/treat/config/core/entities.rb
@ -0,0 +1,2 @@
 {list: [:entity, :unknown, :email, :url, :symbol, :sentence, :punctuation, :number, :enclitic, :word, :token, :fragment, :phrase, :paragraph, :title, :zone, :list, :block, :page, :section, :collection, :document],
 order: [:token, :fragment, :phrase, :sentence, :zone, :section, :document, :collection]}
--- a/lib/treat/config/core/language.rb
+++ b/lib/treat/config/core/language.rb
@ -0,0 +1,3 @@
 {default: :english,
 detect: false,
 detect_at: :document}
--- a/lib/treat/config/core/paths.rb
+++ b/lib/treat/config/core/paths.rb
@ -0,0 +1,8 @@
 {description: {
  :tmp => 'temporary files',
  :lib => 'class and module definitions',
  :bin => 'binary files',
  :files => 'user-saved files',
  :models => 'model files',
  :spec => 'spec test files'
 }}
--- a/lib/treat/config/core/syntax.rb
+++ b/lib/treat/config/core/syntax.rb
@ -0,0 +1 @@
 {sweetened: false}
--- a/lib/treat/config/core/verbosity.rb
+++ b/lib/treat/config/core/verbosity.rb
@ -0,0 +1 @@
 {debug: false, silence: true}
--- a/lib/treat/config/data/core.rb
+++ b/lib/treat/config/data/core.rb
@ -1,54 +0,0 @@
 {
  acronyms: 
    ['xml', 'html', 'txt', 'odt',
    'abw', 'doc', 'yaml', 'uea',
    'lda', 'pdf', 'ptb', 'dot',
    'ai', 'id3', 'svo', 'mlp',
    'svm', 'srx', 'nlp'],
  encodings: 
    {language_to_code: {
      arabic: 'UTF-8',
      chinese: 'GB18030',
      english: 'UTF-8',
      french: 'ISO_8859-1',
      ferman: 'ISO_8859-1',
      hebrew: 'UTF-8'
  }},
  entities: 
      {list: 
        [:entity, :unknown, :email, 
         :url, :symbol, :sentence, 
         :punctuation, :number, 
         :enclitic, :word, :token, :group,
         :fragment, :phrase, :paragraph, 
         :title, :zone, :list, :block, 
         :page, :section, :collection, 
         :document],
      order: 
        [:token, :fragment, :group, 
         :sentence, :zone, :section, 
         :document, :collection]},
      language: {
        default: :english, 
        detect: false, 
        detect_at: :document
      },
      paths: {
        description: {
          tmp: 'temporary files',
          lib: 'class and module definitions',
          bin: 'binary files',
          files: 'user-saved files',
          models: 'model files',
          spec: 'spec test files'
        }
      },
  learning: {
    list: [:data_set, :export, :feature, :tag, :problem, :question]
  },
  syntax: { sweetened: false },
  verbosity: { debug: false, silence: true}
 }
--- a/lib/treat/config/data/databases.rb
+++ b/lib/treat/config/data/databases.rb
@ -1,10 +0,0 @@
 {
  default: {
    adapter: :mongo
  },
  mongo: {
    host: 'localhost', 
    port: '27017', 
    db: nil
  }
 }
--- a/lib/treat/config/data/entities.rb
+++ b/lib/treat/config/data/entities.rb
@ -1,15 +0,0 @@
 {
  list:
    [:entity, :unknown, :email,
      :url, :symbol, :sentence,
      :punctuation, :number,
      :enclitic, :word, :token,
      :fragment, :phrase, :paragraph,
      :title, :zone, :list, :block,
      :page, :section, :collection,
    :document],
  order:
    [:token, :fragment, :phrase,
      :sentence, :zone, :section,
    :document, :collection]
 }
--- a/lib/treat/config/data/languages/agnostic.rb
+++ b/lib/treat/config/data/languages/agnostic.rb
@ -1,33 +0,0 @@
 {
  dependencies: [
    'ferret', 'bson_ext', 'mongo', 'lda-ruby',
    'stanford-core-nlp', 'linguistics',
    'ruby-readability', 'whatlanguage',
    'chronic', 'kronic', 'nickel', 'decisiontree',
    'rb-libsvm', 'ruby-fann', 'zip', 'loggability',
    'tf-idf-similarity', 'narray', 'fastimage',
    'fuzzy-string-match', 'levenshtein-ffi'
  ],
  workers: {
    learners: {
      classifiers: [:id3, :linear, :mlp, :svm]
    },
    extractors: {
      keywords: [:tf_idf],
      language: [:what_language],
      topic_words: [:lda],
      tf_idf: [:native],
      distance: [:levenshtein],
      similarity: [:jaro_winkler, :tf_idf]
    },
    formatters: {
      serializers: [:xml, :yaml, :mongo],
      unserializers: [:xml, :yaml, :mongo],
      visualizers: [:dot, :standoff, :tree] 
    },
    retrievers: {
      searchers: [:ferret],
      indexers: [:ferret]
    }
  }
 }
--- a/lib/treat/config/data/languages/english.rb
+++ b/lib/treat/config/data/languages/english.rb
@ -1,95 +0,0 @@
 {
  dependencies: [
    'rbtagger', 
    'ruby-stemmer', 
    'punkt-segmenter', 
    'tactful_tokenizer',
    'nickel', 
    'rwordnet', 
    'uea-stemmer', 
    'engtagger', 
    'activesupport',
    'srx-english',
    'scalpel'
  ],
  workers: {
    extractors: {
      time: [:chronic, :kronic, :ruby, :nickel],
      topics: [:reuters],
      name_tag: [:stanford]
    },
    inflectors: {
      conjugators: [:linguistics],
      declensors: [:english, :linguistics],
      stemmers: [:porter, :porter_c, :uea],
      ordinalizers:  [:linguistics],
      cardinalizers:  [:linguistics]
    },
    lexicalizers: {
      taggers: [:lingua, :brill, :stanford],
      sensers: [:wordnet],
      categorizers: [:from_tag]
    },
    processors: {
      parsers: [:stanford],
      segmenters: [:scalpel, :srx, :tactful, :punkt, :stanford],
      tokenizers: [:ptb, :stanford, :punkt, :open_nlp]
    }
  },
  stop_words:
    [
      "about",
      "also",
      "are",
      "away",
      "because",
      "been",
      "beside",
      "besides",
      "between",
      "but",
      "cannot",
      "could",
      "did",
      "etc",
      "even",
      "ever",
      "every",
      "for",
      "had",
      "have",
      "how",
      "into",
      "isn",
      "maybe",
      "non",
      "nor",
      "now",
      "should",
      "such",
      "than",
      "that",
      "then",
      "these",
      "this",
      "those",
      "though",
      "too",
      "was",
      "wasn",
      "were",
      "what",
      "when",
      "where",
      "which",
      "while",
      "who",
      "whom",
      "whose",
      "will",
      "with",
      "would",
      "wouldn",
      "yes"
    ]
 }
--- a/lib/treat/config/data/languages/french.rb
+++ b/lib/treat/config/data/languages/french.rb
@ -1,148 +0,0 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer', 
    'stanford-core-nlp'
  ],
  workers: {
    processors: {
      segmenters: [:scalpel],
      tokenizers: [:ptb,:stanford],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
  },
  stop_words:
    [
      "ailleurs",
      "ainsi",
      "alors",
      "aucun",
      "aucune",
      "auquel",
      "aurai",
      "auras",
      "aurez",
      "aurons",
      "auront",
      "aussi",
      "autre",
      "autres",
      "aux",
      "auxquelles",
      "auxquels",
      "avaient",
      "avais",
      "avait",
      "avec",
      "avez",
      "aviez",
      "avoir",
      "avons",
      "celui",
      "cependant",
      "certaine",
      "certaines",
      "certains",
      "ces",
      "cet",
      "cette",
      "ceux",
      "chacun",
      "chacune",
      "chaque",
      "comme",
      "constamment",
      "davantage",
      "depuis",
      "des",
      "desquelles",
      "desquels",
      "dessous",
      "dessus",
      "donc",
      "dont",
      "duquel",
      "egalement",
      "elles",
      "encore",
      "enfin",
      "ensuite",
      "etaient",
      "etais",
      "etait",
      "etes",
      "etiez",
      "etions",
      "etre",
      "eux",
      "guere",
      "ici",
      "ils",
      "jamais",
      "jusqu",
      "laquelle",
      "legerement",
      "lequel",
      "les",
      "lesquelles",
      "lesquels",
      "leur",
      "leurs",
      "lors",
      "lui",
      "maintenant",
      "mais",
      "malgre",
      "moi",
      "moins",
      "notamment",
      "parce",
      "plupart",
      "pourtant",
      "presentement",
      "presque",
      "puis",
      "puisque",
      "quand",
      "quant",
      "que",
      "quel",
      "quelqu",
      "quelque",
      "quelques",
      "qui",
      "quoi",
      "quoique",
      "rien",
      "selon",
      "serai",
      "seras",
      "serez",
      "serons",
      "seront",
      "soient",
      "soit",
      "sommes",
      "sont",
      "sous",
      "suis",
      "telle",
      "telles",
      "tels",
      "toi",
      "toujours",
      "tout",
      "toutes",
      "tres",
      "trop",
      "une",
      "vos",
      "votre",
      "vous"
    ]
 }
--- a/lib/treat/config/data/languages/german.rb
+++ b/lib/treat/config/data/languages/german.rb
@ -1,137 +0,0 @@
 #encoding: UTF-8
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer', 
    'stanford-core-nlp'
  ],
  workers: {
    processors: {
      segmenters: [:tactful, :punkt, :stanford, :scalpel],
      tokenizers: [:stanford, :punkt],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
  },
  stop_words:
    [
      "alle",
      "allem",
      "alles",
      "andere",
      "anderem",
      "anderen",
      "anderer",
      "anderes",
      "auf",
      "bei",
      "beim",
      "bist",
      "dadurch",
      "dein",
      "deine",
      "deiner",
      "deines",
      "deins",
      "dem",
      "denen",
      "der",
      "deren",
      "des",
      "deshalb",
      "dessen",
      "diese",
      "diesem",
      "diesen",
      "dieser",
      "dieses",
      "ein",
      "eine",
      "einem",
      "einen",
      "einer",
      "eines",
      "euer",
      "euere",
      "eueren",
      "eueres",
      "für",
      "haben",
      "habt",
      "hatte",
      "hatten",
      "hattest",
      "hattet",
      "hierzu",
      "hinter",
      "ich",
      "ihr",
      "ihre",
      "ihren",
      "ihrer",
      "ihres",
      "indem",
      "ist",
      "jede",
      "jedem",
      "jeden",
      "jeder",
      "jedes",
      "kann",
      "kannst",
      "können",
      "könnt",
      "konnte",
      "konnten",
      "konntest",
      "konntet",
      "mehr",
      "mein",
      "meine",
      "meiner",
      "meines",
      "meins",
      "nach",
      "neben",
      "nicht",
      "nichts",
      "seid",
      "sein",
      "seine",
      "seiner",
      "seines",
      "seins",
      "sie",
      "sind",
      "über",
      "und",
      "uns",
      "unser",
      "unsere",
      "unter",
      "vor",
      "warst",
      "weil",
      "wenn",
      "werde",
      "werden",
      "werdet",
      "willst",
      "wir",
      "wird",
      "wirst",
      "wollen",
      "wollt",
      "wollte",
      "wollten",
      "wolltest",
      "wolltet",
      "zum",
      "zur"
    ]
 }
--- a/lib/treat/config/data/languages/italian.rb
+++ b/lib/treat/config/data/languages/italian.rb
@ -1,162 +0,0 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: []
    }
  },
  stop_words:
    [
      "affinche",
      "alcun",
      "alcuna",
      "alcune",
      "alcuni",
      "alcuno",
      "allora",
      "altra",
      "altre",
      "altri",
      "altro",
      "anziche",
      "certa",
      "certe",
      "certi",
      "certo",
      "che",
      "chi",
      "chiunque",
      "comunque",
      "con",
      "cosa",
      "cose",
      "cui",
      "dagli",
      "dai",
      "dall",
      "dalla",
      "dalle",
      "darsi",
      "degli",
      "del",
      "dell",
      "della",
      "delle",
      "dello",
      "dunque",
      "egli",
      "eppure",
      "esse",
      "essi",
      "forse",
      "gia",
      "infatti",
      "inoltre",
      "invece",
      "lui",
      "malgrado",
      "mediante",
      "meno",
      "mentre",
      "mie",
      "miei",
      "mio",
      "modo",
      "molta",
      "molte",
      "molti",
      "molto",
      "negli",
      "nel",
      "nella",
      "nelle",
      "nessun",
      "nessuna",
      "nessuno",
      "niente",
      "noi",
      "nostra",
      "nostre",
      "nostri",
      "nostro",
      "nulla",
      "occorre",
      "ogni",
      "ognuno",
      "oltre",
      "oltretutto",
      "oppure",
      "ovunque",
      "ovvio",
      "percio",
      "pertanto",
      "piu",
      "piuttosto",
      "poca",
      "poco",
      "poiche",
      "propri",
      "proprie",
      "proprio",
      "puo",
      "qua",
      "qual",
      "qualche",
      "qualcuna",
      "qualcuno",
      "quale",
      "quali",
      "qualunque",
      "quando",
      "quant",
      "quante",
      "quanti",
      "quanto",
      "quantunque",
      "quegli",
      "quei",
      "quest",
      "questa",
      "queste",
      "questi",
      "questo",
      "qui",
      "quindi",
      "sebbene",
      "sembra",
      "sempre",
      "senza",
      "soltanto",
      "stessa",
      "stesse",
      "stessi",
      "stesso",
      "sugli",
      "sui",
      "sul",
      "sull",
      "sulla",
      "sulle",
      "suo",
      "suoi",
      "taluni",
      "taluno",
      "tanta",
      "tanti",
      "tanto",
      "tra",
      "tuo",
      "tuoi",
      "tutt",
      "tutta",
      "tutte",
      "tutto",
      "una",
      "uno",
      "voi"
    ]
 }
--- a/lib/treat/config/data/languages/polish.rb
+++ b/lib/treat/config/data/languages/polish.rb
@ -1,11 +0,0 @@
 {
  dependencies: [
    'punkt-segmenter',
    'srx-polish'
  ],
  workers: {
    processors: {
      segmenters: [:srx, :punkt]
    }
  }
 }
--- a/lib/treat/config/data/languages/spanish.rb
+++ b/lib/treat/config/data/languages/spanish.rb
@ -1,291 +0,0 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: []
    }
  },
  stop_words:
    [
      "abans",
      "aca",
      "acerca",
      "ahora",
      "aixo",
      "algo",
      "algu",
      "alguien",
      "algun",
      "alguna",
      "algunas",
      "algunes",
      "alguno",
      "algunos",
      "alguns",
      "alla",
      "alli",
      "allo",
      "altra",
      "altre",
      "altres",
      "amb",
      "amunt",
      "antes",
      "aquel",
      "aquell",
      "aquella",
      "aquellas",
      "aquelles",
      "aquellos",
      "aquells",
      "aquest",
      "aquesta",
      "aquestes",
      "aquests",
      "aqui",
      "asimismo",
      "aun",
      "aunque",
      "avall",
      "cada",
      "casi",
      "com",
      "como",
      "con",
      "cosas",
      "coses",
      "cual",
      "cuales",
      "cualquier",
      "cuando",
      "damunt",
      "darrera",
      "davant",
      "debe",
      "deben",
      "deber",
      "debia",
      "debian",
      "decia",
      "decian",
      "decir",
      "deia",
      "deien",
      "del",
      "demasiado",
      "des",
      "desde",
      "despues",
      "dicen",
      "diciendo",
      "dins",
      "dir",
      "diu",
      "diuen",
      "doncs",
      "ell",
      "ellas",
      "elles",
      "ells",
      "els",
      "encara",
      "entonces",
      "ese",
      "esos",
      "esser",
      "esta",
      "estan",
      "estando",
      "estant",
      "estar",
      "estaria",
      "estarian",
      "estarien",
      "estas",
      "estos",
      "farien",
      "feia",
      "feien",
      "fent",
      "fue",
      "fueron",
      "gaire",
      "gairebe",
      "hace",
      "hacia",
      "hacian",
      "haciendo",
      "haran",
      "hauria",
      "haurien",
      "hemos",
      "hola",
      "junto",
      "lejos",
      "les",
      "lloc",
      "los",
      "menos",
      "menys",
      "meva",
      "mias",
      "mio",
      "misma",
      "mismas",
      "mismo",
      "mismos",
      "molt",
      "molta",
      "moltes",
      "mon",
      "mucha",
      "mucho",
      "muy",
      "nadie",
      "ningu",
      "nomes",
      "nosaltres",
      "nosotros",
      "nostra",
      "nostre",
      "nuestra",
      "nuestras",
      "nuestro",
      "nuestros",
      "nunca",
      "otra",
      "pasa",
      "pasan",
      "pasara",
      "pasaria",
      "passara",
      "passaria",
      "passen",
      "perque",
      "poc",
      "pocas",
      "pocos",
      "podem",
      "poden",
      "podeu",
      "podria",
      "podrian",
      "podrien",
      "poques",
      "porque",
      "potser",
      "puc",
      "pudieron",
      "pudo",
      "puede",
      "pueden",
      "puesto",
      "qualsevol",
      "quan",
      "que",
      "queria",
      "querian",
      "qui",
      "quien",
      "quienes",
      "quiere",
      "quieren",
      "quin",
      "quina",
      "quines",
      "quins",
      "quizas",
      "segueent",
      "segun",
      "sempre",
      "seran",
      "seria",
      "serian",
      "seu",
      "seva",
      "sido",
      "siempre",
      "siendo",
      "siguiente",
      "sino",
      "sobretodo",
      "solamente",
      "sovint",
      "suya",
      "suyas",
      "suyo",
      "suyos",
      "tambe",
      "tambien",
      "tanmateix",
      "tanta",
      "tanto",
      "tendran",
      "tendria",
      "tendrian",
      "tenen",
      "teu",
      "teva",
      "tiene",
      "tienen",
      "tindran",
      "tindria",
      "tindrien",
      "toda",
      "todavia",
      "todo",
      "tota",
      "totes",
      "tras",
      "traves",
      "tuvieron",
      "tuvo",
      "tuya",
      "tuyas",
      "tuyo",
      "tuyos",
      "unas",
      "unes",
      "unos",
      "uns",
      "usaba",
      "usaban",
      "usada",
      "usades",
      "usado",
      "usan",
      "usando",
      "usant",
      "usar",
      "usat",
      "usava",
      "usaven",
      "usen",
      "vaig",
      "varem",
      "varen",
      "vareu",
      "vegada",
      "vegades",
      "vez",
      "volem",
      "volen",
      "voleu",
      "vora",
      "vos",
      "vosaltres",
      "vosotros",
      "vostra",
      "vostre",
      "voy",
      "vuestra",
      "vuestras",
      "vuestro",
      "vuestros",
      "vull"
    ]
 }
--- a/lib/treat/config/data/languages/swedish.rb
+++ b/lib/treat/config/data/languages/swedish.rb
@ -1,289 +0,0 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: []
    }
  },
  stop_words:
    [
      "atminstone",
      "an",
      "anda",
      "aven",
      "aldrig",
      "alla",
      "alls",
      "allt",
      "alltid",
      "allting",
      "alltsa",
      "andra",
      "annan",
      "annars",
      "antingen",
      "att",
      "bakom",
      "bland",
      "blev",
      "bli",
      "bliva",
      "blivit",
      "bort",
      "bortom",
      "bredvid",
      "dar",
      "darav",
      "darefter",
      "darfor",
      "dari",
      "darigenom",
      "darvid",
      "dedar",
      "definitivt",
      "del",
      "den",
      "dendar",
      "denhar",
      "denna",
      "deras",
      "dessa",
      "dessutom",
      "desto",
      "det",
      "detta",
      "dylik",
      "efterat",
      "efter",
      "eftersom",
      "eller",
      "emellertid",
      "enbart",
      "endast",
      "enligt",
      "ens",
      "ensam",
      "envar",
      "eran",
      "etc",
      "ett",
      "exakt",
      "fatt",
      "fastan",
      "fick",
      "fler",
      "flera",
      "foljande",
      "foljde",
      "foljer",
      "for",
      "fore",
      "forhoppningsvis",
      "formodligen",
      "forr",
      "forra",
      "forutom",
      "forvisso",
      "fran",
      "framfor",
      "fullstandigt",
      "gang",
      "gar",
      "gatt",
      "ganska",
      "gav",
      "genom",
      "genomgaende",
      "ger",
      "gick",
      "gjorde",
      "gjort",
      "gor",
      "hade",
      "har",
      "harav",
      "har",
      "hej",
      "hela",
      "helst",
      "helt",
      "hitta",
      "hon",
      "honom",
      "hur",
      "huruvida",
      "huvudsakligen",
      "ibland",
      "icke",
      "ickedestomindre",
      "igen",
      "ihop",
      "inat",
      "ingen",
      "ingenstans",
      "inget",
      "innan",
      "innehalla",
      "inre",
      "inte",
      "inuti",
      "istaellet",
      "kanske",
      "klart",
      "knappast",
      "knappt",
      "kom",
      "komma",
      "kommer",
      "kraver",
      "kunde",
      "kunna",
      "lata",
      "later",
      "lagga",
      "langre",
      "laet",
      "lagd",
      "leta",
      "letar",
      "manga",
      "maste",
      "med",
      "medan",
      "medans",
      "mellan",
      "mest",
      "min",
      "mindre",
      "minst",
      "mittemellan",
      "motsvarande",
      "mycket",
      "nagon",
      "nagongang",
      "nagonsin",
      "nagonstans",
      "nagonting",
      "nagorlunda",
      "nagot",
      "namligen",
      "nar",
      "nara",
      "nasta",
      "nastan",
      "nedat",
      "nedanfor",
      "nerat",
      "ner",
      "nog",
      "normalt",
      "nummer",
      "nuvarande",
      "nytt",
      "oavsett",
      "och",
      "ocksa",
      "oppna",
      "over",
      "overallt",
      "ofta",
      "okej",
      "olika",
      "ovanfor",
      "ratt",
      "redan",
      "relativt",
      "respektive",
      "rimlig",
      "rimligen",
      "rimligt",
      "salunda",
      "savida",
      "saga",
      "sager",
      "sakert",
      "sand",
      "sarskilt",
      "satt",
      "sak",
      "samma",
      "samtliga",
      "sedd",
      "senare",
      "senaste",
      "ser",
      "sig",
      "sista",
      "sjaelv",
      "ska",
      "skall",
      "skickad",
      "skriva",
      "skulle",
      "snabb",
      "snarare",
      "snart",
      "som",
      "somliga",
      "speciellt",
      "stalla",
      "stallet",
      "starta",
      "strax",
      "stundom",
      "tackar",
      "tanka",
      "taga",
      "tagen",
      "tala",
      "tanke",
      "tidigare",
      "tills",
      "tog",
      "totalt",
      "trolig",
      "troligen",
      "tvaers",
      "tvars",
      "tycka",
      "tyckte",
      "tyvarr",
      "understundom",
      "upp",
      "uppenbarligen",
      "uppenbart",
      "utan",
      "utanfor",
      "uteslutande",
      "utom",
      "var",
      "varan",
      "vad",
      "val",
      "varde",
      "vanlig",
      "vanligen",
      "var",
      "vare",
      "varenda",
      "varfor",
      "varifran",
      "varit",
      "varje",
      "varken",
      "vars",
      "vart",
      "vem",
      "verkligen",
      "vidare",
      "vilken",
      "vill",
      "visar",
      "visst",
      "visste"
    ]
 }
--- a/lib/treat/config/data/libraries.rb
+++ b/lib/treat/config/data/libraries.rb
@ -1,16 +0,0 @@
 {
  punkt: {
    model_path: nil
  },
  reuters: {
    model_path: nil
  },
  stanford: {
    jar_path: nil, 
    model_path: nil
  },
  open_nlp: {
    jar_path: nil,
    model_path: nil
  }
 }
--- a/lib/treat/config/data/linguistics.rb
+++ b/lib/treat/config/data/linguistics.rb
@ -1,44 +0,0 @@
 {
  categories: 
    ['adjective', 'adverb', 'noun', 
    'verb', 'interjection', 'clitic', 
    'coverb', 'conjunction', 'determiner', 
    'particle', 'preposition', 'pronoun', 
    'number', 'symbol', 'punctuation',
    'complementizer'],
  punctuation: {
    punct_to_category: {
    '.' => 'period',
    ',' => 'comma',
    ';' => 'semicolon',
    ':' => 'colon',
    '?' => 'interrogation',
    '!' => 'exclamation',
    '"' => 'double_quote',
    "'" => 'single_quote',
    '$' => 'dollar',
    '%' => 'percent',
    '#' => 'hash',
    '*' => 'asterisk',
    '&' => 'ampersand',
    '+' => 'plus',
    '-' => 'dash',
    '/' => 'slash',
    '\\' => 'backslash',
    '^' => 'caret',
    '_' => 'underscore',
    '`' => 'tick',
    '|' => 'pipe',
    '~' => 'tilde',
    '@' => 'at',
    '[' => 'bracket',
    ']' => 'bracket',
    '{' => 'brace',
    '}' => 'brace',
    '(' => 'parenthesis',
    ')' => 'parenthesis',
    '<' => 'tag',
    '>' => 'tag'
  }}
 }
--- a/lib/treat/config/data/tags.rb
+++ b/lib/treat/config/data/tags.rb
@ -1,328 +0,0 @@
 {
  aligned: {
    tag_sets: [
      :claws_c5, :brown, :penn,
      :stutgart, :chinese, :paris7
    ],
    phrase_tags: [
      'Adjectival phrase', ['', '', 'ADJP', '', '', 'AP'],
      'Adverbial phrase', ['', '', 'ADVP', '', '', 'AdP'],
      'Conjunction phrase', ['', '', 'CONJP', '', '', 'Ssub'],
      'Fragment', ['', '', 'FRAG', '', '', ''],
      'Interjectional phrase', ['', '', 'INTJ', '', '', ''],
      'List marker', ['', '', 'LST', '', '', ''],
      'Not a phrase', ['', '', 'NAC', '', '', ''],
      'Noun phrase', ['', '', 'NP', '', '', 'NP'],
      'Verbal nucleus',  ['', '', '', '', '', 'VN'],
      'Head of noun phrase', ['', '', 'NX', '', '', ''],
      'Prepositional phrase', ['', '', 'PP', '', '', 'PP'],
      'Parenthetical', ['', '', 'PRN', '', '', ''],
      'Particle', ['', '', 'PRT', '', '', ''],
      'Participial phrase', ['', '', '', '', '', 'VPart'],
      'Quantifier phrase', ['', '', 'QP', '', '', ''],
      'Relative clause', ['', '', 'RRC', '', '', 'Srel'],
      'Coordinated phrase', ['', '', 'UCP', '', '', 'COORD'],
      'Infinitival phrase', ['', '', '', '', '', 'VPinf'],
      'Verb phrase', ['', '', 'VP', '', '', ''],
      'Inverted yes/no question', ['', '', 'SQ', '', '', ''],
      'Wh adjective phrase', ['', '', 'WHADJP', '', '', ''],
      'Wh adverb phrase', ['', '', 'WHADVP', '', '', ''],
      'Wh noun phrase', ['', '', 'WHNP', '', '', ''],
      'Wh prepositional phrase', ['', '', 'WHPP', '', '', ''],
      'Unknown', ['', '', 'X', '', '', ''],
      'Phrase', ['', '', 'P', '', '', 'Sint'],
      'Sentence', ['', '', 'S', '', '', 'SENT'],
      'Phrase', ['', '', 'SBAR', '', '', ''] # Fix
    ],
    word_tags: [
      # Aligned tags for the Claws C5, Brown and Penn tag sets.
      'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'A'],
      'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'ADJ'],
      'Ajective, adverbial or predicative', ['', '', '', 'ADJD', '', 'ADJ'],
      'Adjective, attribute', ['', '', '', 'ADJA', 'VA', 'ADJ'],
      'Adjective, ordinal number', ['ORD', 'OD', 'JJ', '', 'OD', 'ADJ'],
      'Adjective, comparative', ['AJC', 'JJR', 'JJR', 'KOKOM', '', 'ADJ'],
      'Adjective, superlative', ['AJS', 'JJT', 'JJS', '', 'JJ', 'ADJ'],
      'Adjective, superlative, semantically', ['AJ0', 'JJS', 'JJ', '', '', 'ADJ'],
      'Adjective, cardinal number', ['CRD', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
      'Adjective, cardinal number, one', ['PNI', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
      'Adverb', ['AV0', 'RB', 'RB', 'ADV', 'AD', 'ADV'],
      'Adverb, negative', ['XX0', '*', 'RB', 'PTKNEG', '', 'ADV'],
      'Adverb, comparative', ['AV0', 'RBR', 'RBR', '', 'AD', 'ADV'],
      'Adverb, superlative', ['AV0', 'RBT', 'RBS', '', 'AD', 'ADV'],
      'Adverb, particle', ['AVP', 'RP', 'RP', '', '', 'ADV'],
      'Adverb, question', ['AVQ', 'WRB', 'WRB', '', 'AD', 'ADV'],
      'Adverb, degree & question', ['AVQ', 'WQL', 'WRB', '', 'ADV'],
      'Adverb, degree', ['AV0', 'QL', 'RB', '', '', 'ADV'],
      'Adverb, degree, postposed', ['AV0', 'QLP', 'RB', '', '', 'ADV'],
      'Adverb, nominal', ['AV0', 'RN', 'RB', 'PROP', '', 'ADV'],
      'Adverb, pronominal', ['', '', '', '', 'PROP', '', 'ADV'],
      'Conjunction, coordination', ['CJC', 'CC', 'CC', 'KON', 'CC', 'COOD'],
      'Conjunction, coordination, and', ['CJC', 'CC', 'CC', 'KON', 'CC', 'ET'],
      'Conjunction, subordination', ['CJS', 'CS', 'IN', 'KOUS', 'CS', 'CONJ'],
      'Conjunction, subordination with to and infinitive', ['', '', '', 'KOUI', '', ''],
      'Conjunction, complementizer, that', ['CJT', 'CS', 'IN', '', '', 'C'],
      'Determiner', ['DT0', 'DT', 'DT', '', 'DT', 'D'],
      'Determiner, pronoun', ['DT0', 'DTI', 'DT', '', '', 'D'],
      'Determiner, pronoun, plural', ['DT0', 'DTS', 'DT', '', '', 'D'],
      'Determiner, prequalifier', ['DT0', 'ABL', 'DT', '', '', 'D'],
      'Determiner, prequantifier', ['DT0', 'ABN', 'PDT', '', 'DT', 'D'],
      'Determiner, pronoun or double conjunction', ['DT0', 'ABX', 'PDT', '', '', 'D'],
      'Determiner, pronoun or double conjunction', ['DT0', 'DTX', 'DT', '', '', 'D'],
      'Determiner, article', ['AT0', 'AT', 'DT', 'ART', '', 'D'],
      'Determiner, postdeterminer', ['DT0', 'AP', 'DT', '', '', 'D'],
      'Determiner, possessive', ['DPS', 'PP$', 'PRP$', '', '', 'D'],
      'Determiner, possessive, second', ['DPS', 'PP$', 'PRPS', '', '', 'D'],
      'Determiner, question', ['DTQ', 'WDT', 'WDT', '', 'DT', 'D'],
      'Determiner, possessive & question', ['DTQ', 'WP$', 'WP$', '', '', 'D'],
      'Interjection', ['', '', '', '', '', 'I'],
      'Localizer', ['', '', '', '', 'LC'],
      'Measure word', ['', '', '', '', 'M'],
      'Noun, common', ['NN0', 'NN', 'NN', 'N', 'NN', 'NN'],
      'Noun, singular', ['NN1', 'NN', 'NN', 'NN', 'NN', 'N'],
      'Noun, plural', ['NN2', 'NNS', 'NNS', 'NN', 'NN', 'N'],
      'Noun, proper, singular', ['NP0', 'NP', 'NNP', 'NE', 'NR', 'N'],
      'Noun, proper, plural', ['NP0', 'NPS', 'NNPS', 'NE', 'NR', 'N'],
      'Noun, adverbial', ['NN0', 'NR', 'NN', 'NE', '', 'N'],
      'Noun, adverbial, plural', ['NN2', 'NRS', 'NNS', '', 'N'],
      'Noun, temporal', ['', '', '', '', 'NT', 'N'],
      'Noun, verbal', ['', '', '', '', 'NN', 'N'],
      'Pronoun, nominal (indefinite)', ['PNI', 'PN', 'PRP', '', 'PN', 'CL'],
      'Pronoun, personal, subject', ['PNP', 'PPSS', 'PRP', 'PPER'],
      'Pronoun, personal, subject, 3SG', ['PNP', 'PPS', 'PRP', 'PPER'],
      'Pronoun, personal, object', ['PNP', 'PPO', 'PRP', 'PPER'],
      'Pronoun, reflexive', ['PNX', 'PPL', 'PRP', 'PRF'],
      'Pronoun, reflexive, plural', ['PNX', 'PPLS', 'PRP', 'PRF'],
      'Pronoun, question, subject', ['PNQ', 'WPS', 'WP', 'PWAV'],
      'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'],  # FIXME
      'Pronoun, question, object', ['PNQ', 'WPO', 'WP', 'PWAV', 'PWAT'],
      'Pronoun, existential there', ['EX0', 'EX', 'EX'],
      'Pronoun, attributive demonstrative', ['', '', '', 'PDAT'],
      'Prounoun, attributive indefinite without determiner', ['', '', '', 'PIAT'],
      'Pronoun, attributive possessive', ['', '', '', 'PPOSAT', ''],
      'Pronoun, substituting demonstrative', ['', '', '', 'PDS'],
      'Pronoun, substituting possessive', ['', '', '', 'PPOSS', ''],
      'Prounoun, substituting indefinite', ['', '', '', 'PIS'],
      'Pronoun, attributive relative', ['', '', '', 'PRELAT', ''],
      'Pronoun, substituting relative', ['', '', '', 'PRELS', ''],
      'Pronoun, attributive interrogative', ['', '', '', 'PWAT'],
      'Pronoun, adverbial interrogative', ['', '', '', 'PWAV'],
      'Pronoun, substituting interrogative', ['', '', '', 'PWS'],
      'Verb, main, finite', ['', '', '', 'VVFIN', '', 'V'],
      'Verb, main, infinitive', ['', '', '', 'VVINF', '', 'V'],
      'Verb, main, imperative', ['', '', '', 'VVIMP', '', 'V'],
      'Verb, base present form (not infinitive)', ['VVB', 'VB', 'VBP', '', '', 'V'],
      'Verb, infinitive', ['VVI', 'VB', 'VB', 'V', '', 'V'],
      'Verb, past tense', ['VVD', 'VBD', 'VBD', '', '', 'V'],
      'Verb, present participle', ['VVG', 'VBG', 'VBG', 'VAPP', '', 'V'],
      'Verb, past/passive participle', ['VVN', 'VBN', 'VBN', 'VVPP', '', 'V'],
      'Verb, present, 3SG, -s form', ['VVZ', 'VBZ', 'VBZ', '', '', 'V'],
      'Verb, auxiliary', ['', '', '', 'VAFIN', '', 'V'],
      'Verb, imperative', ['', '', '', 'VAIMP', '', 'V'],
      'Verb, imperative infinitive', ['', '', '', 'VAINF', '', 'V'],
      'Verb, auxiliary do, base', ['VDB', 'DO', 'VBP', '', '', 'V'],
      'Verb, auxiliary do, infinitive', ['VDB', 'DO', 'VB', '', '', 'V'],
      'Verb, auxiliary do, past', ['VDD', 'DOD', 'VBD', '', '', 'V'],
      'Verb, auxiliary do, present participle', ['VDG', 'VBG', 'VBG', '', '', 'V'],
      'Verb, auxiliary do, past participle', ['VDN', 'VBN', 'VBN', '', '', 'V'],
      'Verb, auxiliary do, present 3SG', ['VDZ', 'DOZ', 'VBZ', '', '', 'V'],
      'Verb, auxiliary have, base', ['VHB', 'HV', 'VBP', 'VA', '', 'V'],
      'Verb, auxiliary have, infinitive', ['VHI', 'HV', 'VB', 'VAINF', '', 'V'],
      'Verb, auxiliary have, past', ['VHD', 'HVD', 'VBD', 'VA', '', 'V'],
      'Verb, auxiliary have, present participle', ['VHG', 'HVG', 'VBG', 'VA', '', 'V'],
      'Verb, auxiliary have, past participle', ['VHN', 'HVN', 'VBN', 'VAPP', '', 'V'],
      'Verb, auxiliary have, present 3SG', ['VHZ', 'HVZ', 'VBZ', 'VA', '', 'V'],
      'Verb, auxiliary be, infinitive', ['VBI', 'BE', 'VB', '', '', 'V'],
      'Verb, auxiliary be, past', ['VBD', 'BED', 'VBD', '', '', 'V'],
      'Verb, auxiliary be, past, 3SG', ['VBD', 'BEDZ', 'VBD', '', '', 'V'],
      'Verb, auxiliary be, present participle', ['VBG', 'BEG', 'VBG', '', '', 'V'],
      'Verb, auxiliary be, past participle', ['VBN', 'BEN', 'VBN', '', '', 'V'],
      'Verb, auxiliary be, present, 3SG', ['VBZ', 'BEZ', 'VBZ', '', '', 'V'],
      'Verb, auxiliary be, present, 1SG', ['VBB', 'BEM', 'VBP', '', '', 'V'],
      'Verb, auxiliary be, present', ['VBB', 'BER', 'VBP', '', '', 'V'],
      'Verb, modal', ['VM0', 'MD', 'MD', 'VMFIN', 'VV', 'V'],
      'Verb, modal', ['VM0', 'MD', 'MD', 'VMINF', 'VV', 'V'],
      'Verb, modal, finite', ['', '', '', '', 'VMFIN', 'V'],
      'Verb, modal, infinite', ['', '', '', '', 'VMINF', 'V'],
      'Verb, modal, past participle', ['', '', '', '', 'VMPP', 'V'],
      'Particle', ['', '', '', '', '', 'PRT'],
      'Particle, with adverb', ['', '', '', 'PTKA', '', 'PRT'],
      'Particle, answer', ['', '', '', 'PTKANT', '', 'PRT'],
      'Particle, negation', ['', '', '', 'PTKNEG', '', 'PRT'],
      'Particle, separated verb', ['', '', '', 'PTKVZ', '', 'PRT'],
      'Particle, to as infinitive marker', ['TO0', 'TO', 'TO', 'PTKZU', '', 'PRT'],
      'Preposition, comparative', ['', '', '', 'KOKOM', '', 'P'],
      'Preposition, to', ['PRP', 'IN', 'TO', '', '', 'P'],
      'Preposition', ['PRP', 'IN', 'IN', 'APPR', 'P', 'P'],
      'Preposition, with aritcle', ['', '', '', 'APPART', '', 'P'],
      'Preposition, of', ['PRF', 'IN', 'IN', '', '', 'P'],
      'Possessive', ['POS', '$', 'POS'],
      'Postposition', ['', '', '', 'APPO'],
      'Circumposition, right', ['', '', '', 'APZR', ''],
      'Interjection, onomatopoeia or other isolate', ['ITJ', 'UH', 'UH', 'ITJ', 'IJ'],
      'Onomatopoeia', ['', '', '', '', 'ON'],
      'Punctuation', ['', '', '', '', 'PU', 'PN'],
      'Punctuation, sentence ender', ['PUN', '.', '.', '', '', 'PN'],
      'Punctuation, semicolon', ['PUN', '.', '.', '', '', 'PN'],
      'Puncutation, colon or ellipsis', ['PUN', ':', ':'],
      'Punctuation, comma', ['PUN', ',', ',', '$,'],
      'Punctuation, dash', ['PUN', '-', '-'],
      'Punctuation, dollar sign', ['PUN', '', '$'],
      'Punctuation, left bracket', ['PUL', '(', '(', '$('],
      'Punctuation, right bracket', ['PUR', ')', ')'],
      'Punctuation, quotation mark, left', ['PUQ', '', '``'],
      'Punctuation, quotation mark, right', ['PUQ', '', '"'],
      'Punctuation, left bracket', ['PUL', '(', 'PPL'],
      'Punctuation, right bracket', ['PUR', ')', 'PPR'],
      'Punctuation, left square bracket', ['PUL', '(', 'LSB'],
      'Punctuation, right square bracket', ['PUR', ')', 'RSB'],
      'Punctuation, left curly bracket', ['PUL', '(', 'LCB'],
      'Punctuation, right curly bracket', ['PUR', ')', 'RCB'],
      'Unknown, foreign words (not in lexicon)', ['UNZ', '(FW-)', 'FW', '', 'FW'],
      'Symbol', ['', '', 'SYM', 'XY'],
      'Symbol, alphabetical', ['ZZ0', '', ''],
      'Symbol, list item', ['', '', 'LS'],
      # Not sure about these tags from the Chinese PTB.
      'Aspect marker', ['', '', '', '', 'AS'],                         # ?
      'Ba-construction', ['', '', '', '', 'BA'],                       # ?
      'In relative', ['', '', '', '', 'DEC'],                          # ?
      'Associative', ['', '', '', '', 'DER'],                          # ?
      'In V-de or V-de-R construct', ['', '', '', '', 'DER'],          # ?
      'For words ? ', ['', '', '', '', 'ETC'],                         # ?
      'In long bei-construct', ['', '', '', '', 'LB'],                 # ?
      'In short bei-construct', ['', '', '', '', 'SB'],                # ?
      'Sentence-nal particle', ['', '', '', '', 'SB'],                 # ?
      'Particle, other', ['', '', '', '', 'MSP'],                      # ?
      'Before VP', ['', '', '', '', 'DEV'],                            # ?
      'Verb, ? as main verb', ['', '', '', '', 'VE'],                  # ?
      'Verb, ????', ['', '', '', '', 'VC']                             # ?
    ]},
  enju: {
    cat_to_category: {
      'ADJ' => 'adjective',
      'ADV' => 'adverb',
      'CONJ' => 'conjunction',
      'COOD' => 'conjunction',
      'C' => 'complementizer',
      'D' => 'determiner',
      'N' => 'noun',
      'P' => 'preposition',
      'PN' => 'punctuation',
      'SC' => 'conjunction',
      'V' => 'verb',
      'PRT' => 'particle'
    },
    cat_to_description: [
      ['ADJ',	'Adjective'],
      ['ADV',	'Adverb'],
      ['CONJ',	'Coordination conjunction'],
      ['C',	'Complementizer'],
      ['D',	'Determiner'],
      ['N',	'Noun'],
      ['P',	'Preposition'],
      ['SC',	'Subordination conjunction'],
      ['V',	'Verb'],
      ['COOD',	'Part of coordination'],
      ['PN',	'Punctuation'],
      ['PRT',	'Particle'],
      ['S',	'Sentence']
    ],
    xcat_to_description: [
      ['COOD',	'Coordinated phrase/clause'],
      ['IMP',	'Imperative sentence'],
      ['INV',	'Subject-verb inversion'],
      ['Q',	'Interrogative sentence with subject-verb inversion'],
      ['REL',	'A relativizer included'],
      ['FREL', 'A free relative included'],
      ['TRACE',	'A trace included'],
      ['WH', 'A wh-question word included']
    ],
    xcat_to_ptb: [
      ['ADJP', '', 'ADJP'],
      ['ADJP', 'REL', 'WHADJP'],
      ['ADJP', 'FREL', 'WHADJP'],
      ['ADJP', 'WH', 'WHADJP'],
      ['ADVP', '', 'ADVP'],
      ['ADVP', 'REL', 'WHADVP'],
      ['ADVP', 'FREL', 'WHADVP'],
      ['ADVP', 'WH', 'WHADVP'],
      ['CONJP', '', 'CONJP'],
      ['CP', '', 'SBAR'],
      ['DP', '', 'NP'],
      ['NP', '', 'NP'],
      ['NX', 'NX', 'NAC'],
      ['NP'	'REL'	'WHNP'],
      ['NP'	'FREL'	'WHNP'],
      ['NP'	'WH'	'WHNP'],
      ['PP', '', 'PP'],
      ['PP', 'REL', 'WHPP'],
      ['PP', 'WH', 'WHPP'],
      ['PRT', '', 'PRT'],
      ['S', '', 'S'],
      ['S', 'INV', 'SINV'],
      ['S', 'Q', 'SQ'],
      ['S', 'REL', 'SBAR'],
      ['S', 'FREL', 'SBAR'],
      ['S', 'WH', 'SBARQ'],
      ['SCP', '', 'SBAR'],
      ['VP', '', 'VP'],
      ['VP', '', 'VP'],
      ['', '', 'UK']
    ]},
  paris7: {
    tag_to_category: {
      'C' => :complementizer,
      'PN' => :punctuation,
      'SC' => :conjunction
    }
    # Paris7 Treebank functional tags
 =begin
  SUJ (subject)
  OBJ (direct object)
  ATS (predicative complement of a subject)
  ATO (predicative complement of a direct object)
  MOD (modifier or adjunct)
  A-OBJ (indirect complement introduced by à)
  DE-OBJ (indirect complement introduced by de)
  P-OBJ (indirect complement introduced by another preposition)
 =end
  },
  ptb: {
    escape_characters: {
    '(' => '-LRB-',
    ')' => '-RRB-',
    '[' => '-LSB-',
    ']' => '-RSB-',
    '{' => '-LCB-',
    '}' => '-RCB-'
    },
    phrase_tag_to_description: [
      ['S', 'Paris7 declarative clause'],
      ['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
      ['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
      ['SINV', 'Inverted declarative sentence'],
      ['SQ', 'Inverted yes/no question']
    ]
  } 
 }
--- a/lib/treat/config/databases/default.rb
+++ b/lib/treat/config/databases/default.rb
@ -0,0 +1 @@
 {adapter: :mongo}
--- a/lib/treat/config/databases/mongo.rb
+++ b/lib/treat/config/databases/mongo.rb
@ -0,0 +1 @@
 {host: 'localhost', port: '27017', db: nil }
--- a/lib/treat/config/importable.rb
+++ b/lib/treat/config/importable.rb
@ -1,31 +0,0 @@
 # Mixin that is extended by Treat::Config
 # in order to provide a single point of 
 # access method to trigger the import.
 module Treat::Config::Importable
  # Import relies on each configuration.
  require_relative 'configurable'
   # Store all the configuration in self.config
  def self.extended(base)
    class << base; attr_accessor :config; end
  end
  # Main function; loads all configuration options.
  def import!
    config, c = {}, Treat::Config::Configurable
    definition = :define_singleton_method
    Treat::Config.constants.each do |const|
      next if const.to_s.downcase.is_mixin?
      klass = Treat::Config.const_get(const)
      klass.class_eval { extend c }.configure!
      name = const.to_s.downcase.intern
      config[name] = klass.config
      Treat.send(definition, name) do
        Treat::Config.config[name]
      end
    end
    self.config = config.to_struct
  end
 end
--- a/lib/treat/config/languages/agnostic.rb
+++ b/lib/treat/config/languages/agnostic.rb
@ -0,0 +1,34 @@
 {
  dependencies: [
    'psych',
    'nokogiri',
    'ferret',
    'bson_ext',
    'mongo',
    'lda-ruby',
    'stanford-core-nlp',
    'linguistics',
    'ruby-readability',
    'whatlanguage',
    'chronic',
    'nickel',
    'decisiontree',
    'ai4r'
  ],
  workers: {
    extractors: {
      keywords: [:tf_idf],
      language: [:what_language]
    },
    formatters: {
      serializers: [:xml, :yaml, :mongo]
    },
    lexicalizers: {
      categorizers: [:from_tag]
    },
    inflectors: {
      ordinalizers: [:linguistics],
      cardinalizers: [:linguistics]
    }
  }
 }
--- a/lib/treat/config/data/languages/arabic.rb
+++ b/lib/treat/config/data/languages/arabic.rb
--- a/lib/treat/config/data/languages/chinese.rb
+++ b/lib/treat/config/data/languages/chinese.rb
--- a/lib/treat/config/data/languages/dutch.rb
+++ b/lib/treat/config/data/languages/dutch.rb
@ -6,7 +6,7 @@
  workers: {
    processors: {
      segmenters: [:punkt],
-      tokenizers: []
+      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/languages/english.rb
+++ b/lib/treat/config/languages/english.rb
@ -0,0 +1,60 @@
 {
  dependencies: [
    'rbtagger', 
    'ruby-stemmer', 
    'punkt-segmenter', 
    'tactful_tokenizer', 
    'nickel', 
    'rwordnet', 
    'uea-stemmer', 
    'engtagger', 
    'activesupport', 
    'english'
  ],
  workers: {
    extractors: {
      time: [:chronic, :ruby, :nickel],
      topics: [:reuters],
      keywords: [:tf_idf],
      name_tag: [:stanford],
      coreferences: [:stanford]
    },
    inflectors: {
      conjugators: [:linguistics],
      declensors: [:english, :linguistics, :active_support],
      stemmers: [:porter, :porter_c, :uea],
      ordinalizers:  [:linguistics],
      cardinalizers:  [:linguistics]
    },
    lexicalizers: {
      taggers: [:lingua, :brill, :stanford],
      sensers: [:wordnet]
    },
    processors: {
      parsers: [:stanford, :enju],
      segmenters: [:tactful, :punkt, :stanford],
      tokenizers: [:ptb, :stanford, :tactful, :punkt]
    }
  },
  info: {
    stopwords:
      ['the', 'of', 'and', 'a', 'to', 'in', 'is',
      'you', 'that', 'it', 'he', 'was', 'for', 'on',
      'are', 'as', 'with', 'his', 'they', 'I', 'at',
      'be', 'this', 'have', 'from', 'or', 'one', 'had',
      'by', 'word', 'but', 'not', 'what', 'all', 'were',
      'we', 'when', 'your', 'can', 'said', 'there', 'use',
      'an', 'each', 'which', 'she', 'do', 'how', 'their',
      'if', 'will', 'up', 'other', 'about', 'out', 'many',
      'then', 'them', 'these', 'so', 'some', 'her', 'would',
      'make', 'like', 'him', 'into', 'time', 'has', 'look',
      'two', 'more', 'write', 'go', 'see', 'number', 'no',
      'way', 'could', 'people', 'my', 'than', 'first', 'been',
      'call', 'who', 'its', 'now', 'find', 'long', 'down',
      'day', 'did', 'get', 'come', 'made', 'may', 'part',
      'say', 'also', 'new', 'much', 'should', 'still',
      'such', 'before', 'after', 'other', 'then', 'over',
      'under', 'therefore', 'nonetheless', 'thereafter',
      'afterwards', 'here', 'huh', 'hah', "n't", "'t", 'here']
  }
 }
--- a/lib/treat/config/languages/french.rb
+++ b/lib/treat/config/languages/french.rb
@ -0,0 +1,18 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer', 
    'stanford-core-nlp'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
  }
 }
--- a/lib/treat/config/languages/german.rb
+++ b/lib/treat/config/languages/german.rb
@ -0,0 +1,18 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer', 
    'stanford'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful],
      parsers: [:stanford]
    },
    lexicalizers: {
      taggers: [:stanford],
      categorizers: [:from_tag]
    }
  }
 }
--- a/lib/treat/config/data/languages/greek.rb
+++ b/lib/treat/config/data/languages/greek.rb
@ -6,7 +6,7 @@
  workers: {
    processors: {
      segmenters: [:punkt],
-      tokenizers: []
+      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/languages/italian.rb
+++ b/lib/treat/config/languages/italian.rb
@ -0,0 +1,12 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/languages/polish.rb
+++ b/lib/treat/config/languages/polish.rb
@ -0,0 +1,12 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/data/languages/portuguese.rb
+++ b/lib/treat/config/data/languages/portuguese.rb
@ -6,7 +6,7 @@
  workers: {
    processors: {
      segmenters: [:punkt],
-      tokenizers: []
+      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/data/languages/russian.rb
+++ b/lib/treat/config/data/languages/russian.rb
@ -6,7 +6,7 @@
  workers: {
    processors: {
      segmenters: [:punkt],
-      tokenizers: []
+      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/languages/spanish.rb
+++ b/lib/treat/config/languages/spanish.rb
@ -0,0 +1,12 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/languages/swedish.rb
+++ b/lib/treat/config/languages/swedish.rb
@ -0,0 +1,12 @@
 {
  dependencies: [
    'punkt-segmenter', 
    'tactful_tokenizer'
  ],
  workers: {
    processors: {
      segmenters: [:punkt],
      tokenizers: [:tactful]
    }
  }
 }
--- a/lib/treat/config/libraries/stanford.rb
+++ b/lib/treat/config/libraries/stanford.rb
@ -0,0 +1 @@
 {jar_path: nil, model_path: nil}
--- a/lib/treat/config/linguistics/categories.rb
+++ b/lib/treat/config/linguistics/categories.rb
@ -0,0 +1,4 @@
 ['adjective', 'adverb', 'noun', 'verb', 'interjection',
 'clitic', 'coverb', 'conjunction', 'determiner', 'particle',
 'preposition', 'pronoun', 'number', 'symbol', 'punctuation',
 'complementizer']
--- a/lib/treat/config/linguistics/punctuation.rb
+++ b/lib/treat/config/linguistics/punctuation.rb
@ -0,0 +1,33 @@
 {punct_to_category: {
  '.' => 'period',
  ',' => 'comma',
  ';' => 'semicolon',
  ':' => 'colon',
  '?' => 'interrogation',
  '!' => 'exclamation',
  '"' => 'double_quote',
  "'" => 'single_quote',
  '$' => 'dollar',
  '%' => 'percent',
  '#' => 'hash',
  '*' => 'asterisk',
  '&' => 'ampersand',
  '+' => 'plus',
  '-' => 'dash',
  '/' => 'slash',
  '\\' => 'backslash',
  '^' => 'caret',
  '_' => 'underscore',
  '`' => 'tick',
  '|' => 'pipe',
  '~' => 'tilde',
  '@' => 'at',
  '[' => 'bracket',
  ']' => 'bracket',
  '{' => 'brace',
  '}' => 'brace',
  '(' => 'parenthesis',
  ')' => 'parenthesis',
  '<' => 'tag',
  '>' => 'tag'
 }}
--- a/lib/treat/config/paths.rb
+++ b/lib/treat/config/paths.rb
@ -1,23 +0,0 @@
 # Generates the following path config options:
 # Treat.paths.tmp, Treat.paths.bin, Treat.paths.lib,
 # Treat.paths.models, Treat.paths.files, Treat.paths.spec.
 class Treat::Config::Paths
  # Get the path configuration based on the 
  # directory structure loaded into Paths.
  # Note that this doesn't call super, as
  # there is no external config files to load.
  def self.configure!
    root = File.dirname(File.expand_path(         # FIXME
    __FILE__)).split('/')[0..-4].join('/') + '/'
    self.config = Hash[
    # Get a list of directories in treat/
    Dir.glob(root + '*').select do |path|
      FileTest.directory?(path)
    # Map to pairs of [:name, path]
    end.map do |path|
      [File.basename(path).intern, path + '/']
    end]
  end
 end
--- a/lib/treat/config/tags.rb
+++ b/lib/treat/config/tags.rb
@ -1,37 +0,0 @@
 # Handles all configuration related 
 # to understanding of part of speech
 # and phrasal tags.
 class Treat::Config::Tags
  # Generate a map of word and phrase tags 
  # to their syntactic category, keyed by 
  # tag set.
  def self.configure!
    super
    config = self.config[:aligned].dup
    word_tags, phrase_tags, tag_sets = 
    config[:word_tags], config[:phrase_tags]
    tag_sets = config[:tag_sets]
    config[:word_tags_to_category] = 
    align_tags(word_tags, tag_sets)
    config[:phrase_tags_to_category] =
    align_tags(phrase_tags, tag_sets)
    self.config[:aligned] = config
 end
 # Helper methods for tag set config.
 # Align tag tags in the tag set 
 def self.align_tags(tags, tag_sets)
   wttc = {}
   tags.each_slice(2) do |desc, tags|
     category = desc.gsub(',', ' ,').
     split(' ')[0].downcase
     tag_sets.each_with_index do |tag_set, i|
       next unless tags[i]
       wttc[tags[i]] ||= {}
       wttc[tags[i]][tag_set] = category
     end
   end; return wttc
 end
 end
--- a/lib/treat/config/tags/aligned.rb
+++ b/lib/treat/config/tags/aligned.rb
@ -0,0 +1,221 @@
 {tag_sets: [
  :claws_c5, :brown, :penn, :stutgart, :chinese, :paris7
 ],
 phrase_tags: [
  'Adjectival phrase', ['', '', 'ADJP', '', '', 'AP'],
  'Adverbial phrase', ['', '', 'ADVP', '', '', 'AdP'],
  'Conjunction phrase', ['', '', 'CONJP', '', '', 'Ssub'],
  'Fragment', ['', '', 'FRAG', '', '', ''],
  'Interjectional phrase', ['', '', 'INTJ', '', '', ''],
  'List marker', ['', '', 'LST', '', '', ''],
  'Not a phrase', ['', '', 'NAC', '', '', ''],
  'Noun phrase', ['', '', 'NP', '', '', 'NP'],
  'Verbal nucleus',  ['', '', '', '', '', 'VN'],
  'Head of noun phrase', ['', '', 'NX', '', '', ''],
  'Prepositional phrase', ['', '', 'PP', '', '', 'PP'],
  'Parenthetical', ['', '', 'PRN', '', '', ''],
  'Particle', ['', '', 'PRT', '', '', ''],
  'Participial phrase', ['', '', '', '', '', 'VPart'],
  'Quantifier phrase', ['', '', 'QP', '', '', ''],
  'Relative clause', ['', '', 'RRC', '', '', 'Srel'],
  'Coordinated phrase', ['', '', 'UCP', '', '', 'COORD'],
  'Infinitival phrase', ['', '', '', '', '', 'VPinf'], 
  'Verb phrase', ['', '', 'VP', '', '', ''],
  'Wh adjective phrase', ['', '', 'WHADJP', '', '', ''],
  'Wh adverb phrase', ['', '', 'WHAVP', '', '', ''],
  'Wh noun phrase', ['', '', 'WHNP', '', '', ''],
  'Wh prepositional phrase', ['', '', 'WHPP', '', '', ''],
  'Unknown', ['', '', 'X', '', '', ''],
  'Phrase', ['', '', 'P', '', '', 'Sint'],
  'Sentence', ['', '', 'S', '', '', 'SENT'],
  'Phrase', ['', '', 'SBAR', '', '', ''] # Fix
 ],
 word_tags: [
  # Aligned tags for the Claws C5, Brown and Penn tag sets.
  # Adapted from Manning, Christopher and Schütze, Hinrich,
  # 1999. Foundations of Statistical Natural Language
  # Processing. MIT Press, p. 141-142;
  # http://www.isocat.org/rest/dcs/376;
  'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'A'],
  'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'ADJ'],
  'Ajective, adverbial or predicative', ['', '', '', 'ADJD', '', 'ADJ'],
  'Adjective, attribute', ['', '', '', 'ADJA', 'VA', 'ADJ'],
  'Adjective, ordinal number', ['ORD', 'OD', 'JJ', '', 'OD', 'ADJ'],
  'Adjective, comparative', ['AJC', 'JJR', 'JJR', 'KOKOM', '', 'ADJ'],
  'Adjective, superlative', ['AJS', 'JJT', 'JJS', '', 'JJ', 'ADJ'],
  'Adjective, superlative, semantically', ['AJ0', 'JJS', 'JJ', '', '', 'ADJ'],
  'Adjective, cardinal number', ['CRD', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
  'Adjective, cardinal number, one', ['PNI', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
  'Adverb', ['AV0', 'RB', 'RB', 'ADV', 'AD', 'ADV'],
  'Adverb, negative', ['XX0', '*', 'RB', 'PTKNEG', '', 'ADV'],
  'Adverb, comparative', ['AV0', 'RBR', 'RBR', '', 'AD', 'ADV'],
  'Adverb, superlative', ['AV0', 'RBT', 'RBS', '', 'AD', 'ADV'],
  'Adverb, particle', ['AVP', 'RP', 'RP', '', '', 'ADV'],
  'Adverb, question', ['AVQ', 'WRB', 'WRB', '', 'AD', 'ADV'],
  'Adverb, degree & question', ['AVQ', 'WQL', 'WRB', '', 'ADV'],
  'Adverb, degree', ['AV0', 'QL', 'RB', '', '', 'ADV'],
  'Adverb, degree, postposed', ['AV0', 'QLP', 'RB', '', '', 'ADV'],
  'Adverb, nominal', ['AV0', 'RN', 'RB', 'PROP', '', 'ADV'],
  'Adverb, pronominal', ['', '', '', '', 'PROP', '', 'ADV'],
  'Conjunction, coordination', ['CJC', 'CC', 'CC', 'KON', 'CC', 'COOD'],
  'Conjunction, coordination, and', ['CJC', 'CC', 'CC', 'KON', 'CC', 'ET'],
  'Conjunction, subordination', ['CJS', 'CS', 'IN', 'KOUS', 'CS', 'CONJ'],
  'Conjunction, subordination with to and infinitive', ['', '', '', 'KOUI', '', ''],
  'Conjunction, complementizer, that', ['CJT', 'CS', 'IN', '', '', 'C'],
  'Determiner', ['DT0', 'DT', 'DT', '', 'DT', 'D'],
  'Determiner, pronoun', ['DT0', 'DTI', 'DT', '', '', 'D'],
  'Determiner, pronoun, plural', ['DT0', 'DTS', 'DT', '', '', 'D'],
  'Determiner, prequalifier', ['DT0', 'ABL', 'DT', '', '', 'D'],
  'Determiner, prequantifier', ['DT0', 'ABN', 'PDT', '', 'DT', 'D'],
  'Determiner, pronoun or double conjunction', ['DT0', 'ABX', 'PDT', '', '', 'D'],
  'Determiner, pronoun or double conjunction', ['DT0', 'DTX', 'DT', '', '', 'D'],
  'Determiner, article', ['AT0', 'AT', 'DT', 'ART', '', 'D'],
  'Determiner, postdeterminer', ['DT0', 'AP', 'DT', '', '', 'D'],
  'Determiner, possessive', ['DPS', 'PP$', 'PRP$', '', '', 'D'],
  'Determiner, possessive, second', ['DPS', 'PP$', 'PRPS', '', '', 'D'],
  'Determiner, question', ['DTQ', 'WDT', 'WDT', '', 'DT', 'D'],
  'Determiner, possessive & question', ['DTQ', 'WP$', 'WP$', '', '', 'D'],
  'Interjection', ['', '', '', '', '', 'I'],
  'Localizer', ['', '', '', '', 'LC'],
  'Measure word', ['', '', '', '', 'M'],
  'Noun, common', ['NN0', 'NN', 'NN', 'N', 'NN', 'NN'],
  'Noun, singular', ['NN1', 'NN', 'NN', 'NN', 'NN', 'N'],
  'Noun, plural', ['NN2', 'NNS', 'NNS', 'NN', 'NN', 'N'],
  'Noun, proper, singular', ['NP0', 'NP', 'NNP', 'NE', 'NR', 'N'],
  'Noun, proper, plural', ['NP0', 'NPS', 'NNPS', 'NE', 'NR', 'N'],
  'Noun, adverbial', ['NN0', 'NR', 'NN', 'NE', '', 'N'],
  'Noun, adverbial, plural', ['NN2', 'NRS', 'NNS', '', 'N'],
  'Noun, temporal', ['', '', '', '', 'NT', 'N'],
  'Noun, verbal', ['', '', '', '', 'NN', 'N'],
  'Pronoun, nominal (indefinite)', ['PNI', 'PN', 'PRP', '', 'PN', 'CL'],
  'Pronoun, personal, subject', ['PNP', 'PPSS', 'PRP', 'PPER'],
  'Pronoun, personal, subject, 3SG', ['PNP', 'PPS', 'PRP', 'PPER'],
  'Pronoun, personal, object', ['PNP', 'PPO', 'PRP', 'PPER'],
  'Pronoun, reflexive', ['PNX', 'PPL', 'PRP', 'PRF'],
  'Pronoun, reflexive, plural', ['PNX', 'PPLS', 'PRP', 'PRF'],
  'Pronoun, question, subject', ['PNQ', 'WPS', 'WP', 'PWAV'],
  'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'],  # Hack
  'Pronoun, question, object', ['PNQ', 'WPO', 'WP', 'PWAV', 'PWAT'],
  'Pronoun, existential there', ['EX0', 'EX', 'EX'],
  'Pronoun, attributive demonstrative', ['', '', '', 'PDAT'],
  'Prounoun, attributive indefinite without determiner', ['', '', '', 'PIAT'],
  'Pronoun, attributive possessive', ['', '', '', 'PPOSAT', ''],
  'Pronoun, substituting demonstrative', ['', '', '', 'PDS'],
  'Pronoun, substituting possessive', ['', '', '', 'PPOSS', ''],
  'Prounoun, substituting indefinite', ['', '', '', 'PIS'],
  'Pronoun, attributive relative', ['', '', '', 'PRELAT', ''],
  'Pronoun, substituting relative', ['', '', '', 'PRELS', ''],
  'Pronoun, attributive interrogative', ['', '', '', 'PWAT'],
  'Pronoun, adverbial interrogative', ['', '', '', 'PWAV'],
  'Pronoun, substituting interrogative', ['', '', '', 'PWS'],
  'Verb, main, finite', ['', '', '', 'VVFIN', '', 'V'],
  'Verb, main, infinitive', ['', '', '', 'VVINF', '', 'V'],
  'Verb, main, imperative', ['', '', '', 'VVIMP', '', 'V'],
  'Verb, base present form (not infinitive)', ['VVB', 'VB', 'VBP', '', '', 'V'],
  'Verb, infinitive', ['VVI', 'VB', 'VB', 'V', '', 'V'],
  'Verb, past tense', ['VVD', 'VBD', 'VBD', '', '', 'V'],
  'Verb, present participle', ['VVG', 'VBG', 'VBG', 'VAPP', '', 'V'],
  'Verb, past/passive participle', ['VVN', 'VBN', 'VBN', 'VVPP', '', 'V'],
  'Verb, present, 3SG, -s form', ['VVZ', 'VBZ', 'VBZ', '', '', 'V'],
  'Verb, auxiliary', ['', '', '', 'VAFIN', '', 'V'],
  'Verb, imperative', ['', '', '', 'VAIMP', '', 'V'],
  'Verb, imperative infinitive', ['', '', '', 'VAINF', '', 'V'],
  'Verb, auxiliary do, base', ['VDB', 'DO', 'VBP', '', '', 'V'],
  'Verb, auxiliary do, infinitive', ['VDB', 'DO', 'VB', '', '', 'V'],
  'Verb, auxiliary do, past', ['VDD', 'DOD', 'VBD', '', '', 'V'],
  'Verb, auxiliary do, present participle', ['VDG', 'VBG', 'VBG', '', '', 'V'],
  'Verb, auxiliary do, past participle', ['VDN', 'VBN', 'VBN', '', '', 'V'],
  'Verb, auxiliary do, present 3SG', ['VDZ', 'DOZ', 'VBZ', '', '', 'V'],
  'Verb, auxiliary have, base', ['VHB', 'HV', 'VBP', 'VA', '', 'V'],
  'Verb, auxiliary have, infinitive', ['VHI', 'HV', 'VB', 'VAINF', '', 'V'],
  'Verb, auxiliary have, past', ['VHD', 'HVD', 'VBD', 'VA', '', 'V'],
  'Verb, auxiliary have, present participle', ['VHG', 'HVG', 'VBG', 'VA', '', 'V'],
  'Verb, auxiliary have, past participle', ['VHN', 'HVN', 'VBN', 'VAPP', '', 'V'],
  'Verb, auxiliary have, present 3SG', ['VHZ', 'HVZ', 'VBZ', 'VA', '', 'V'],
  'Verb, auxiliary be, infinitive', ['VBI', 'BE', 'VB', '', '', 'V'],
  'Verb, auxiliary be, past', ['VBD', 'BED', 'VBD', '', '', 'V'],
  'Verb, auxiliary be, past, 3SG', ['VBD', 'BEDZ', 'VBD', '', '', 'V'],
  'Verb, auxiliary be, present participle', ['VBG', 'BEG', 'VBG', '', '', 'V'],
  'Verb, auxiliary be, past participle', ['VBN', 'BEN', 'VBN', '', '', 'V'],
  'Verb, auxiliary be, present, 3SG', ['VBZ', 'BEZ', 'VBZ', '', '', 'V'],
  'Verb, auxiliary be, present, 1SG', ['VBB', 'BEM', 'VBP', '', '', 'V'],
  'Verb, auxiliary be, present', ['VBB', 'BER', 'VBP', '', '', 'V'],
  'Verb, modal', ['VM0', 'MD', 'MD', 'VMFIN', 'VV', 'V'],
  'Verb, modal', ['VM0', 'MD', 'MD', 'VMINF', 'VV', 'V'],
  'Verb, modal, finite', ['', '', '', '', 'VMFIN', 'V'],
  'Verb, modal, infinite', ['', '', '', '', 'VMINF', 'V'],
  'Verb, modal, past participle', ['', '', '', '', 'VMPP', 'V'],
  'Particle', ['', '', '', '', '', 'PRT'],
  'Particle, with adverb', ['', '', '', 'PTKA', '', 'PRT'],
  'Particle, answer', ['', '', '', 'PTKANT', '', 'PRT'],
  'Particle, negation', ['', '', '', 'PTKNEG', '', 'PRT'],
  'Particle, separated verb', ['', '', '', 'PTKVZ', '', 'PRT'],
  'Particle, to as infinitive marker', ['TO0', 'TO', 'TO', 'PTKZU', '', 'PRT'],
  'Preposition, comparative', ['', '', '', 'KOKOM', '', 'P'],
  'Preposition, to', ['PRP', 'IN', 'TO', '', '', 'P'],
  'Preposition', ['PRP', 'IN', 'IN', 'APPR', 'P', 'P'],
  'Preposition, with aritcle', ['', '', '', 'APPART', '', 'P'],
  'Preposition, of', ['PRF', 'IN', 'IN', '', '', 'P'],
  'Possessive', ['POS', '$', 'POS'],
  'Postposition', ['', '', '', 'APPO'],
  'Circumposition, right', ['', '', '', 'APZR', ''],
  'Interjection, onomatopoeia or other isolate', ['ITJ', 'UH', 'UH', 'ITJ', 'IJ'],
  'Onomatopoeia', ['', '', '', '', 'ON'],
  'Punctuation', ['', '', '', '', 'PU', 'PN'],
  'Punctuation, sentence ender', ['PUN', '.', '.', '', '', 'PN'],
  'Punctuation, semicolon', ['PUN', '.', '.', '', '', 'PN'],
  'Puncutation, colon or ellipsis', ['PUN', ':', ':'],
  'Punctuationm, comma', ['PUN', ',', ',', '$,'],
  'Punctuation, dash', ['PUN', '-', '-'],
  'Punctuation, dollar sign', ['PUN', '', '$'],
  'Punctuation, left bracket', ['PUL', '(', '(', '$('],
  'Punctuation, right bracket', ['PUR', ')', ')'],
  'Punctuation, quotation mark, left', ['PUQ', '', '``'],
  'Punctuation, quotation mark, right', ['PUQ', '', '"'],
  'Punctuation, left bracket', ['PUL', '(', 'PPL'],
  'Punctuation, right bracket', ['PUR', ')', 'PPR'],
  'Punctuation, left square bracket', ['PUL', '(', 'LSB'],
  'Punctuation, right square bracket', ['PUR', ')', 'RSB'],
  'Punctuation, left curly bracket', ['PUL', '(', 'LCB'],
  'Punctuation, right curly bracket', ['PUR', ')', 'RCB'],
  'Unknown, foreign words (not in lexicon)', ['UNZ', '(FW-)', 'FW', '', 'FW'],
  'Symbol', ['', '', 'SYM', 'XY'],
  'Symbol, alphabetical', ['ZZ0', '', ''],
  'Symbol, list item', ['', '', 'LS'],
  # Not sure about these tags from the Chinese PTB.
  'Aspect marker', ['', '', '', '', 'AS'],                         # ?
  'Ba-construction', ['', '', '', '', 'BA'],                       # ?
  'In relative', ['', '', '', '', 'DEC'],                          # ?
  'Associative', ['', '', '', '', 'DER'],                          # ?
  'In V-de or V-de-R construct', ['', '', '', '', 'DER'],          # ?
  'For words ? ', ['', '', '', '', 'ETC'],                         # ?
  'In long bei-construct', ['', '', '', '', 'LB'],                 # ?
  'In short bei-construct', ['', '', '', '', 'SB'],                # ?
  'Sentence-nal particle', ['', '', '', '', 'SB'],                 # ?
  'Particle, other', ['', '', '', '', 'MSP'],                      # ?
  'Before VP', ['', '', '', '', 'DEV'],                            # ?
  'Verb, ? as main verb', ['', '', '', '', 'VE'],                  # ?
  'Verb, ????', ['', '', '', '', 'VC']                             # ?
 ]}
--- a/lib/treat/config/tags/enju.rb
+++ b/lib/treat/config/tags/enju.rb
@ -0,0 +1,71 @@
 {cat_to_category: {
  'ADJ' => 'adjective',
  'ADV' => 'adverb',
  'CONJ' => 'conjunction',
  'COOD' => 'conjunction',
  'C' => 'complementizer',
  'D' => 'determiner',
  'N' => 'noun',
  'P' => 'preposition',
  'PN' => 'punctuation',
  'SC' => 'conjunction',
  'V' => 'verb',
  'PRT' => 'particle'
 },
 cat_to_description: [
  ['ADJ',	'Adjective'],
  ['ADV',	'Adverb'],
  ['CONJ',	'Coordination conjunction'],
  ['C',	'Complementizer'],
  ['D',	'Determiner'],
  ['N',	'Noun'],
  ['P',	'Preposition'],
  ['SC',	'Subordination conjunction'],
  ['V',	'Verb'],
  ['COOD',	'Part of coordination'],
  ['PN',	'Punctuation'],
  ['PRT',	'Particle'],
  ['S',	'Sentence']  
 ],
 xcat_to_description: [
  ['COOD',	'Coordinated phrase/clause'],
  ['IMP',	'Imperative sentence'],
  ['INV',	'Subject-verb inversion'],
  ['Q',	'Interrogative sentence with subject-verb inversion'],
  ['REL',	'A relativizer included'],
  ['FREL', 'A free relative included'],
  ['TRACE',	'A trace included'],
  ['WH', 'A wh-question word included']
 ],
 xcat_to_ptb: [
  ['ADJP', '', 'ADJP'],
  ['ADJP', 'REL', 'WHADJP'],
  ['ADJP', 'FREL', 'WHADJP'],
  ['ADJP', 'WH', 'WHADJP'],
  ['ADVP', '', 'ADVP'],
  ['ADVP', 'REL', 'WHADVP'],
  ['ADVP', 'FREL', 'WHADVP'],
  ['ADVP', 'WH', 'WHADVP'],
  ['CONJP', '', 'CONJP'],
  ['CP', '', 'SBAR'],
  ['DP', '', 'NP'],
  ['NP', '', 'NP'],
  ['NX', 'NX', 'NAC'],
  ['NP'	'REL'	'WHNP'],
  ['NP'	'FREL'	'WHNP'],
  ['NP'	'WH'	'WHNP'],
  ['PP', '', 'PP'],
  ['PP', 'REL', 'WHPP'],
  ['PP', 'WH', 'WHPP'],
  ['PRT', '', 'PRT'],
  ['S', '', 'S'],
  ['S', 'INV', 'SINV'],
  ['S', 'Q', 'SQ'],
  ['S', 'REL', 'SBAR'],
  ['S', 'FREL', 'SBAR'],
  ['S', 'WH', 'SBARQ'],
  ['SCP', '', 'SBAR'],
  ['VP', '', 'VP'],
  ['VP', '', 'VP'],
  ['', '', 'UK']
 ]}
--- a/lib/treat/config/tags/paris7.rb
+++ b/lib/treat/config/tags/paris7.rb
@ -0,0 +1,17 @@
 {tag_to_category: {
  'C' => :complementizer,
  'PN' => :punctuation,
  'SC' => :conjunction
 }
 # Paris7 Treebank functional tags
 =begin
 SUJ (subject)
 OBJ (direct object)
 ATS (predicative complement of a subject)
 ATO (predicative complement of a direct object)
 MOD (modifier or adjunct)
 A-OBJ (indirect complement introduced by à)
 DE-OBJ (indirect complement introduced by de)
 P-OBJ (indirect complement introduced by another preposition)
 =end
 }
--- a/lib/treat/config/tags/ptb.rb
+++ b/lib/treat/config/tags/ptb.rb
@ -0,0 +1,15 @@
 {escape_characters: {
  '(' => '-LRB-',
  ')' => '-RRB-',
  '[' => '-LSB-',
  ']' => '-RSB-',
  '{' => '-LCB-',
  '}' => '-RCB-'
 },
 phrase_tag_to_description: [
  ['S', 'Paris7 declarative clause'],
  ['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
  ['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
  ['SINV', 'Inverted declarative sentence'],
  ['SQ', 'Inverted yes/no question']
 ]}
--- a/lib/treat/config/data/workers/extractors.rb
+++ b/lib/treat/config/data/workers/extractors.rb
@ -6,7 +6,7 @@
  },
  time: {
    type: :annotator,
-    targets: [:group]
+    targets: [:phrase]
  },
  topics: {
    type: :annotator,
@ -22,18 +22,18 @@
  },
  name_tag: {
    type: :annotator,
-    targets: [:group]
+    targets: [:phrase, :word]
  },
  coreferences: {
    type: :annotator,
    targets: [:zone]
  },
  tf_idf: {
    type: :annotator,
    targets: [:word]
  },
-  similarity: {
+  summary: {
-    type: :computer,
+    type: :annotator,
-    targets: [:entity]
+    targets: [:document]
  },
  distance: {
    type: :computer,
    targets: [:entity]
  }
 }
--- a/lib/treat/config/data/workers/formatters.rb
+++ b/lib/treat/config/data/workers/formatters.rb
--- a/lib/treat/config/data/workers/inflectors.rb
+++ b/lib/treat/config/data/workers/inflectors.rb
--- a/lib/treat/config/data/workers/learners.rb
+++ b/lib/treat/config/data/workers/learners.rb
--- a/lib/treat/config/data/workers/lexicalizers.rb
+++ b/lib/treat/config/data/workers/lexicalizers.rb
@ -1,12 +1,11 @@
 {
  taggers: {
    type: :annotator,
-    targets: [:group, :token],
+    targets: [:phrase, :token]
    recursive: true
  },
  categorizers: {
    type: :annotator,
-    targets: [:group, :token],
+    targets: [:phrase, :token],
    recursive: true
  },
  sensers: {
@ -15,5 +14,5 @@
    preset_option: :nym,
    presets: [:synonyms, :antonyms, 
              :hyponyms, :hypernyms],
-  }
+  }        
 }
--- a/lib/treat/config/workers/list.rb
+++ b/lib/treat/config/workers/list.rb
@ -0,0 +1 @@
 [:extractors, :inflectors, :formatters, :learners, :lexicalizers, :processors, :retrievers]
--- a/lib/treat/config/data/workers/processors.rb
+++ b/lib/treat/config/data/workers/processors.rb
@ -1,7 +1,7 @@
 {
  chunkers: {
    type: :transformer,
-    targets: [:document, :section],
+    targets: [:document],
    default: :autoselect
  },
  segmenters: {
@ -10,10 +10,10 @@
  },
  tokenizers: {
    type: :transformer,
-    targets: [:group]
+    targets: [:sentence, :phrase]
  },
  parsers: {
    type: :transformer,
-    targets: [:group] 
+    targets: [:sentence, :phrase] 
  }
 }
--- a/lib/treat/config/data/workers/retrievers.rb
+++ b/lib/treat/config/data/workers/retrievers.rb
--- a/lib/treat/core.rb
+++ b/lib/treat/core.rb
@ -0,0 +1,5 @@
 # Contains the core classes used by Treat.
 module Treat::Core
  p = Treat.paths.lib + 'treat/core/*.rb'
  Dir.glob(p).each { |f| require f }
 end
--- a/lib/treat/learning/data_set.rb
+++ b/lib/treat/learning/data_set.rb
@ -2,7 +2,7 @@
 # problem as well as data for entities that
 # have already been classified, complete with
 # references to these entities.
-class Treat::Learning::DataSet
+class Treat::Core::DataSet
  # The classification problem this
  # data set holds data for.
@ -11,31 +11,16 @@ class Treat::Learning::DataSet
  # classified (training data).
  attr_accessor :items
-  # Initialize the DataSet.
+  # Initialize the DataSet. Can be 
  # done with a Problem entity
  # (thereby creating an empty set)
  # or with a filename (representing
  # a serialized data set which will
  # then be deserialized and loaded).
  def initialize(problem)
    unless problem.is_a?(Treat::Learning::Problem)
      raise Treat::Exception, "The first argument " +
      "to initialize should be an instance of " +
      "Treat::Learning::Problem."
    end
    @problem, @items = problem, []
  end
  def self.build(from)
    if from.is_a?(Hash)
      Treat::Learning::DataSet.unserialize(
      Treat.databases.default.adapter, from)
    elsif from.is_a?(String)
      unless File.readable?(from)
        raise Treat::Exception,
        "Attempting to initialize data set from " +
        "file '#{from}', but it is not readable."
      end
      Treat::Learning::DataSet.unserialize(
      File.extname(from)[1..-1], file: from)
    end
  end
  # Add an entity to the data set. The 
  # entity's relevant features are 
  # calculated based on the classification 
@ -43,9 +28,8 @@ class Treat::Learning::DataSet
  # of the calculation is added to the 
  # data set, along with the ID of the entity.
  def <<(entity)
-    @items << { 
+    @items << { tags: @problem.
-    tags: (!@problem.tags.empty? ? 
+    export_tags(entity),
    @problem.export_tags(entity) : []),
    features: @problem.
    export_features(entity),
    id: entity.id }
@ -92,7 +76,7 @@ class Treat::Learning::DataSet
      next unless tag.proc_string
      tag.proc = eval(tag.proc_string)
    end
-    data_set = Treat::Learning::DataSet.new(problem)
+    data_set = Treat::Core::DataSet.new(problem)
    data_set.items = items
    data_set
  end
@ -131,7 +115,7 @@ class Treat::Learning::DataSet
      raise Treat::Exception, 
      "Couldn't retrieve problem ID #{options[:problem]}."
    end
-    problem = Treat::Learning::Problem.from_hash(p_record)
+    problem = Treat::Core::Problem.from_hash(p_record)
    data = database.collection('data').find(options).to_a
    items = []
    data.each do |datum|
@ -142,7 +126,7 @@ class Treat::Learning::DataSet
      item[:id] = datum['id']
      items << item
    end
-    data_set = Treat::Learning::DataSet.new(problem)
+    data_set = Treat::Core::DataSet.new(problem)
    data_set.items = items
    data_set
  end
@ -150,18 +134,13 @@ class Treat::Learning::DataSet
  # Merge another data set into this one.
  def merge(data_set)
    if data_set.problem != @problem
-      raise Treat::Exception,
+      raise Treat::Exception,                               # FIXME
      "Cannot merge two data sets that " +
      "don't reference the same problem." 
    else
-      @items += data_set.items
+      @items << data_set.items
      @entities << data_set.entities
    end
  end
  # Compare with other data set.
  def ==(data_set)
    @problem == data_set.problem &&
    @items == data_set.items
  end
 end
--- a/lib/treat/core/dsl.rb
+++ b/lib/treat/core/dsl.rb
@ -1,23 +0,0 @@
 module Treat::Core::DSL
  # Map all classes in Treat::Entities to
  # a global builder function (entity, word,
  # phrase, punctuation, symbol, list, etc.)
  def self.included(base)
    def method_missing(sym,*args,&block)
      @@entities ||= Treat.core.entities.list
      @@learning ||= Treat.core.learning.list
      if @@entities.include?(sym)
        klass = Treat::Entities.const_get(sym.cc)
        return klass.build(*args)
      elsif @@learning.include?(sym)
        klass = Treat::Learning.const_get(sym.cc)
        return klass.new(*args)
      else
        super(sym,*args,&block)
        raise "Uncaught method ended up in Treat DSL."
      end
    end
  end
 end
--- a/lib/treat/core/export.rb
+++ b/lib/treat/core/export.rb
@ -0,0 +1,39 @@
 # Represents a feature to be used
 # in a classification task.
 class Treat::Core::Export
  # The name of the feature. If no 
  # proc is supplied, this assumes
  # that the target of your classification
  # problem responds to the method
  # corresponding to this name.
  attr_reader :name
  # The feature's default value, if nil.
  attr_reader :default
  # A proc that can be used to perform
  # calculations before storing a feature.
  attr_accessor :proc
  # The proc as a string value.
  attr_accessor :proc_string
  require 'treat/core/hashable'
  include Treat::Core::Hashable
  # Initialize a feature for a classification problem.
  def initialize(name, default = nil, proc_string = nil)
    @name, @default, @proc_string =
    name, default, proc_string
    @proc = proc_string ? eval(proc_string) : nil
  end
  # Custom comparison operator for features.
  def ==(feature)
    @name == feature.name &&
    @proc == feature.proc &&
    @default == feature.default
  end
 end
 class Treat::Core::Feature < Treat::Core::Export; end
 class Treat::Core::Tag < Treat::Core::Export; end
--- a/lib/treat/core/hashable.rb
+++ b/lib/treat/core/hashable.rb
@ -0,0 +1,12 @@
 module Treat::Core::Hashable
  def to_hash
    hash = {}
    instance_variables.each do |var|
      val = instance_variable_get(var)
      hash[var.to_s.delete("@")] = val
    end
    hash
  end
 end
--- a/lib/treat/learning/problem.rb
+++ b/lib/treat/learning/problem.rb
@ -2,7 +2,7 @@
 # - What question are we trying to answer?
 # - What features are we going to look at
 #   to attempt to answer that question?
-class Treat::Learning::Problem
+class Treat::Core::Problem
  # A unique identifier for the problem.
  attr_accessor :id
@ -18,38 +18,32 @@ class Treat::Learning::Problem
  attr_reader :tag_labels
  # Initialize the problem with a question
-  # and an arbitrary number of features.        # FIXME: init with id!?
+  # and an arbitrary number of features.
  def initialize(question, *exports)
-    unless question.is_a?(Treat::Learning::Question)
+    unless question.is_a?(Treat::Core::Question)
      raise Treat::Exception,
      "The first argument to initialize " +
      "should be an instance of " +
-      "Treat::Learning::Question."
+      "Treat::Core::Question."
    end
-    if exports.any? { |f| !f.is_a?(Treat::Learning::Export) }
+    if exports.any? { |f| !f.is_a?(Treat::Core::Export) }
      raise Treat::Exception,
      "The second argument and all subsequent ones " +
      "to initialize should be instances of subclasses " +
-      "of Treat::Learning::Export."
+      "of Treat::Core::Export."
    end
    @question, @id = question, object_id
    @features = exports.select do |exp|
-      exp.is_a?(Treat::Learning::Feature)
+      exp.is_a?(Treat::Core::Feature)
    end
    if @features.size == 0
      raise Treat::Exception, 
      "Problem should be supplied with at least "+
      "one feature to work with."
    end
    @tags = exports.select do |exp|
-      exp.is_a?(Treat::Learning::Tag)
+      exp.is_a?(Treat::Core::Tag)
    end
    @feature_labels = @features.map { |f| f.name }
    @tag_labels = @tags.map { |t| t.name }
  end
  # Custom comparison for problems.
  # Should we check for ID here ? FIXME
  def ==(problem)
    @question == problem.question &&
    @features == problem.features &&
@ -63,29 +57,15 @@ class Treat::Learning::Problem
  # all of the features.
  def export_features(e, include_answer = true)
    features = export(e, @features)
-    return features if !include_answer
+    return features unless include_answer
    features << (e.has?(@question.name) ? 
    e.get(@question.name) : @question.default)
    features
  end
-  def export_tags(entity)
+  def export_tags(e); export(e, @tags); end
    if @tags.empty?
      raise Treat::Exception,
      "Cannot export the tags, because " +
      "this problem doesn't have any."
    end
    export(entity, @tags)
  end
  def export(entity, exports)
    unless @question.target == entity.type
      targ, type = @question.target, entity.type
      raise Treat::Exception, 
      "This classification problem targets " +
      "#{targ}s, but a(n) #{type} " +
      "was passed to export instead."
    end
    ret = []
    exports.each do |export|
      r = export.proc ? 
@ -97,44 +77,35 @@ class Treat::Learning::Problem
  end
  def to_hash
-    {'question' => object_to_hash(@question),
+    {'question' => @question.to_hash,
    'features' => @features.map { |f| 
-    object_to_hash(f.tap { |f| f.proc = nil }) },
+    f.tap { |f| f.proc = nil }.to_hash },
    'tags' => @tags.map { |t| 
-    object_to_hash(t.tap { |t| t.proc = nil }) },
+    t.tap { |t| t.proc = nil }.to_hash },
    'id' => @id }
  end
  def object_to_hash(obj)
    hash = {}
    obj.instance_variables.each do |var|
      val = obj.instance_variable_get(var)
      hash[var.to_s.delete("@")] = val
    end
    hash
  end
  def self.from_hash(hash)
-    question = Treat::Learning::Question.new(
+    question = Treat::Core::Question.new(
      hash['question']['name'], 
      hash['question']['target'],
-      hash['question']['default'],
+      hash['question']['type'],
-      hash['question']['type']
+      hash['question']['default']
    )
    features = []
    hash['features'].each do |feature|
-      features << Treat::Learning::Feature.new(
+      features << Treat::Core::Feature.new(
      feature['name'], feature['default'],
      feature['proc_string'])
    end
    tags = []
    hash['tags'].each do |tag|
-      tags << Treat::Learning::Tag.new(
+      tags << Treat::Core::Tag.new(
      tag['name'], tag['default'],
      tag['proc_string'])
    end
    features_and_tags = features + tags
-    p = Treat::Learning::Problem.new(question, *features_and_tags)
+    p = Treat::Core::Problem.new(question, *features_and_tags)
    p.id = hash['id']
    p
  end
--- a/lib/treat/learning/question.rb
+++ b/lib/treat/learning/question.rb
@ -1,6 +1,9 @@
 # Defines a question to answer in the
 # context of a classification problem.
-class Treat::Learning::Question
+class Treat::Core::Question
  require 'treat/core/hashable'
  include Treat::Core::Hashable
  # Defines an arbitrary label for the
  # question we are trying to answer 
@ -8,32 +11,20 @@ class Treat::Learning::Question
  # also be used as the annotation name
  # for the answer to the question.
  attr_reader :name
  # Defines the target of the question
  # (e.g. :sentence, :paragraph, etc.)
  attr_reader :target
  # Can be :continuous or :discrete,
  # depending on the features used.
  attr_reader :type
  # Defines the target of the question
  # (e.g. :sentence, :paragraph, etc.)
  attr_reader :target
  # Default for the answer to the question.
  attr_reader :default
  # Initialize the question.
-  def initialize(name, target, default = nil, type = :continuous)
+  def initialize(name, target, 
-    unless name.is_a?(Symbol)
+    type = :continuous, default = nil)
-      raise Treat::Exception, 
+    @name, @target = name, target
-      "Question name should be a symbol."
+    @type, @default = type, default
    end
    unless Treat.core.entities.list.include?(target)
      raise Treat::Exception, "Target type should be " +
      "a symbol and should be one of the following: " +
      Treat.core.entities.list.inspect
    end
    unless [:continuous, :discrete].include?(type)
      raise Treat::Exception, "Type should be " +
      "continuous or discrete."
    end
    @name, @target, @type, @default = 
     name,  target,  type,  default
  end
  # Custom comparison operator for questions.
--- a/lib/treat/core/server.rb
+++ b/lib/treat/core/server.rb
@ -1,41 +0,0 @@
 class Treat::Core::Server 
  # Refer to http://rack.rubyforge.org/doc/classes/Rack/Server.html
  # for possible options to configure.
  def initialize(handler = 'thin', options = {})
    raise "Implementation not finished."
    require 'json'; require 'rack'
    @handler, @options = handler.capitalize, options
  end
  def start
    handler = Rack::Handler.const_get(@handler)
    handler.run(self, @options)
  end
  def call(env)
    headers = { 'content-type' => 'application/json' }
    rack_input = env["rack.input"].read
    if rack_input.strip == ''
      return [500, headers, {
        'error' => 'Empty JSON request.'
      }]
    end
    rack_json = JSON.parse(rack_input)
    unless rack_json['type'] && 
    rack_json['value'] && rack_json['do']
      return [500, headers, {
        'error' => 'Must specify "type", "value" and "do".'
      }]
    end
    if rack_json['conf']
      # Set the configuration.
    end
    method = rack_json['type'].capitalize.intern
    resp = send(method, rack_json[value]).do(rack_json['do'])
    response = [rack_input.to_json]
    [200, headers, response]
  end
 end
--- a/lib/treat/entities.rb
+++ b/lib/treat/entities.rb
@ -0,0 +1,6 @@
 # Contains the textual model used by Treat.
 module Treat::Entities
  require 'treat/entities/entity'
  p = Treat.paths.lib + 'treat/entities/*.rb'
  Dir.glob(p).each { |f| require f }
 end
--- a/lib/treat/entities/abilities/buildable.rb
+++ b/lib/treat/entities/abilities/buildable.rb
@ -3,7 +3,7 @@
 # a string or a numeric object. This class
 # is pretty much self-explanatory.
 # FIXME how can we make this language independent?
-module Treat::Entities::Entity::Buildable
+module Treat::Entities::Abilities::Buildable
  require 'schiphol'
  require 'fileutils'
@ -15,21 +15,7 @@ module Treat::Entities::Entity::Buildable
  PunctRegexp = /^[[:punct:]\$]+$/
  UriRegexp = /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix
  EmailRegexp = /.+\@.+\..+/
-  Enclitics = [
+  Enclitics = %w['ll 'm 're 's 't 've]
              # EXAMPLE:
    "'d",     # I'd         => I would
    "'ll",    # I'll        => I will
    "'m",     # I'm         => I am
    "'re",    # We're       => We are
    "'s",     # There's     => There is
              # Let's       => Let us
    "'t",     # 'Twas       => Archaic ('Twas the night)
    "'ve",    # They've     => They have
    "n't"     # Can't       => Can not
  ]
  # Accepted formats of serialized files
  AcceptedFormats = ['.xml', '.yml', '.yaml', '.mongo']
  # Reserved folder names
  Reserved = ['.index']
@ -37,38 +23,23 @@ module Treat::Entities::Entity::Buildable
  # Build an entity from anything (can be
  # a string, numeric,folder, or file name
  # representing a raw or serialized file).
-  def build(*args)
+  def build(file_or_value, options = {})
    # This probably needs some doc.
    if args.size == 0
      file_or_value = ''
    elsif args[0].is_a?(Hash)
      file_or_value = args[0]
    elsif args.size == 1
      if args[0].is_a?(Treat::Entities::Entity)
        args[0] = [args[0]]
      end
      file_or_value = args[0]
    else
      file_or_value = args
    end
    fv = file_or_value.to_s
-    if fv == ''; self.new
+    if file_or_value.is_a?(Hash)
    elsif file_or_value.is_a?(Array)
      from_array(file_or_value)
    elsif file_or_value.is_a?(Hash)
      from_db(file_or_value)
-    elsif self == Treat::Entities::Document || (is_serialized_file?(fv))
+    elsif self == Treat::Entities::Document ||
      (fv.index('yml') || fv.index('yaml') ||
      fv.index('xml') || fv.index('mongo'))
      if fv =~ UriRegexp
-        from_url(fv)
+        from_url(fv, options)
      else
-        from_file(fv)
+        from_file(fv, options)
      end
    elsif self == Treat::Entities::Collection
      if FileTest.directory?(fv)
-        from_folder(fv)
+        from_folder(fv, options)
      else
        create_collection(fv)
      end
@ -92,34 +63,27 @@ module Treat::Entities::Entity::Buildable
  # is user-created (i.e. by calling build
  # instead of from_string directly).
  def from_string(string, enforce_type = false)
-    # If calling using the build syntax (i.e. user-
+
    # called), enforce the type that was supplied.
    enforce_type = true if caller_method == :build
    unless self == Treat::Entities::Entity
      return self.new(string) if enforce_type
    end
    e = anything_from_string(string)
    if enforce_type && !e.is_a?(self)
      raise "Asked to build a #{self.mn.downcase} "+
      "from \"#{string}\" and to enforce type, "+
      "but type detected was #{e.class.mn.downcase}."
    end
    e
  end
-  # Build a document from an array
+    e = anything_from_string(string)
-  # of builders.
+
-  def from_array(array)
+    if enforce_type && !e.is_a?(self)
-    obj = self.new
+      raise "Asked to build a #{cl(self).downcase} "+
-    array.each do |el|
+      "from \"#{string}\" and to enforce type, "+
-      el = el.to_entity unless el.is_a?(Treat::Entities::Entity)
+      "but type detected was #{cl(e.class).downcase}."
      obj << el
    end
-    obj
+
    e
  end
  # Build a document from an URL.
-  def from_url(url)
+  def from_url(url, options)
    unless self ==
      Treat::Entities::Document
      raise Treat::Exception,
@ -127,22 +91,16 @@ module Treat::Entities::Entity::Buildable
      'else than a document from a url.'
    end
-    begin
+    f = Schiphol.download(url,
-      folder = Treat.paths.files
+    :download_folder => Treat.paths.files,
-      if folder[-1] == '/'
+    :show_progress => Treat.core.verbosity.silence,
-        folder = folder[0..-2]
+    :rectify_extensions => true,
-      end
+    :max_tries => 3
-      f = Schiphol.download(url,
+    )
      download_folder: folder,
      show_progress: !Treat.core.verbosity.silence,
      rectify_extensions: true,
      max_tries: 3)
    rescue
      raise Treat::Exception,
      "Couldn't download file at #{url}."
    end
-    e = from_file(f,'html')
+    options[:default_to] ||= 'html'
    e = from_file(f, options)
    e.set :url, url.to_s
    e
@ -165,7 +123,7 @@ module Treat::Entities::Entity::Buildable
  # Build an entity from a folder with documents.
  # Folders will be searched recursively.
-  def from_folder(folder)
+  def from_folder(folder, options)
    return if Reserved.include?(folder)
@ -191,43 +149,39 @@ module Treat::Entities::Entity::Buildable
    c = Treat::Entities::Collection.new(folder)
    folder += '/' unless folder[-1] == '/'
    if !FileTest.directory?(folder)
      FileUtils.mkdir(folder)
    end
    c.set :folder, folder
    i = folder + '/.index'
    c.set :index, i if FileTest.directory?(i)
    Dir[folder + '*'].each do |f|
      if FileTest.directory?(f)
        c2 = Treat::Entities::Collection.
-        from_folder(f)
+        from_folder(f, options)
        c.<<(c2, false) if c2
      else
        c.<<(Treat::Entities::Document.
-        from_file(f), false)
+        from_file(f, options), false)
      end
    end
-
+    c
    return c
  end
  # Build a document from a raw or serialized file.
-  def from_file(file,def_fmt=nil)
+  def from_file(file, options)
-    if is_serialized_file?(file)
+    if file.index('yml') ||
-      from_serialized_file(file)
+      file.index('yaml') ||
      file.index('xml') ||
      file.index('mongo')
      from_serialized_file(file, options)
    else
-      fmt = Treat::Workers::Formatters::Readers::Autoselect.detect_format(file,def_fmt)
+      fmt = Treat::Workers::Formatters::Readers::Autoselect.
-      from_raw_file(file, fmt)
+      detect_format(file, options[:default_to])
      options[:_format] = fmt
      from_raw_file(file, options)
    end
  end
  # Build a document from a raw file.
-  def from_raw_file(file, def_fmt='txt')
+  def from_raw_file(file, options)
    unless self ==
      Treat::Entities::Document
@ -241,40 +195,32 @@ module Treat::Entities::Entity::Buildable
      "Path '#{file}' does not "+
      "point to a readable file."
    end
-    options =  {default_format: def_fmt}
+
-    d = Treat::Entities::Document.new
+    d = Treat::Entities::Document.new(file)
-    d.set :file, file
+
    d.read(:autoselect, options)
  end
  # Build an entity from a serialized file.
-  def from_serialized_file(file)
+  def from_serialized_file(file, options)
-    unless File.readable?(file)
+    if file.index('mongo')
-      raise Treat::Exception,
+      options[:id] = file.scan(              # Consolidate this
-      "Path '#{file}' does not "+
+      /([0-9]+)\.mongo/).first.first
-      "point to a readable file."
+      from_db(:mongo, options)
    end
    doc = Treat::Entities::Document.new
    doc.set :file, file
    format = nil
    if File.extname(file) == '.yml' ||
       File.extname(file) == '.yaml'
      format = :yaml
    elsif File.extname(file) == '.xml'
      format = :xml
    else
-      raise Treat::Exception,
+      unless File.readable?(file)
-      "Unreadable serialized format for #{file}."
+        raise Treat::Exception,
        "Path '#{file}' does not "+
        "point to a readable file."
      end
      d = Treat::Entities::Document.new(file)
      d.unserialize(:autoselect, options)
      d.children[0].set_as_root!              # Fix this
      d.children[0]
    end
    doc.unserialize(format)
    doc.children[0].set_as_root!              # Fix this
    doc.children[0]
  end
  def is_serialized_file?(path_to_check)
    (AcceptedFormats.include? File.extname(path_to_check)) && (File.file?(path_to_check))
  end
  def from_db(hash)
@ -292,28 +238,15 @@ module Treat::Entities::Entity::Buildable
  # Build any kind of entity from a string.
  def anything_from_string(string)
    case self.mn.downcase.intern
    when :document
      folder = Treat.paths.files
      if folder[-1] == '/'
        folder = folder[0..-2]
      end
-      now = Time.now.to_f
+    case cl(self).downcase.intern
-      doc_file = folder+ "/#{now}.txt"
+    when :document, :collection
      string.force_encoding('UTF-8')
      File.open(doc_file, 'w') do |f|
        f.puts string
      end
      from_raw_file(doc_file)
    when :collection
      raise Treat::Exception,
-      "Cannot create a " +
+      "Cannot create a document or " +
      "collection from a string " +
      "(need a readable file/folder)."
    when :phrase
-      group_from_string(string)
+      sentence_or_phrase_from_string(string)
    when :token
      token_from_string(string)
    when :zone
@ -325,7 +258,7 @@ module Treat::Entities::Entity::Buildable
        if string.gsub(/[\.\!\?]+/,
          '.').count('.') <= 1 &&
          string.count("\n") == 0
-          group_from_string(string)
+          sentence_or_phrase_from_string(string)
        else
          zone_from_string(string)
        end
@ -336,14 +269,15 @@ module Treat::Entities::Entity::Buildable
  end
  # This should be improved on.
  def check_encoding(string)
    string.encode("UTF-8", undef: :replace) # Fix
  end
  # Build a phrase from a string.
-  def group_from_string(string)
+  def sentence_or_phrase_from_string(string)
    check_encoding(string)
    if !(string =~ /[a-zA-Z]+/)
      Treat::Entities::Fragment.new(string)
    elsif string.count('.!?') >= 1
@ -351,6 +285,7 @@ module Treat::Entities::Entity::Buildable
    else
      Treat::Entities::Phrase.new(string)
    end
  end
  # Build the right type of token
@ -396,7 +331,7 @@ module Treat::Entities::Entity::Buildable
    end
  end
-
+  
  def create_collection(fv)
    FileUtils.mkdir(fv)
    Treat::Entities::Collection.new(fv)
--- a/lib/treat/entities/abilities/checkable.rb
+++ b/lib/treat/entities/abilities/checkable.rb
@ -1,7 +1,7 @@
 # This module implements methods that are used
 # by workers to determine if an entity is properly
 # formatted before working on it.
-module Treat::Entities::Entity::Checkable
+module Treat::Entities::Abilities::Checkable
  # Check if the entity has the given feature,
  # and if so return it. If not, calculate the
@ -15,7 +15,7 @@ module Treat::Entities::Entity::Checkable
    g2 = Treat::Workers.lookup(feature)
    raise Treat::Exception,
-    "#{g1.type.to_s.capitalize} " +
+    "#{g1.type.to_s.capitalize} #{task} " +
    "requires #{g2.type} #{g2.method}."
  end
--- a/lib/treat/entities/abilities/comparable.rb
+++ b/lib/treat/entities/abilities/comparable.rb
@ -1,21 +1,21 @@
-# Allow comparison of entity hierarchy in DOM.
+module Treat::Entities::Abilities::Comparable
 module Treat::Entities::Entity::Comparable
  # Determines whether the receiving class
  # is smaller, equal or greater in the DOM
  # hierarchy compared to the supplied one.
  def compare_with(klass)
    i = 0; rank_a = nil; rank_b = nil
    Treat.core.entities.order.each do |type|
-      klass2 = Treat::Entities.const_get(type.cc)
+      klass2 = Treat::Entities.const_get(cc(type))
      rank_a = i if self <= klass2
      rank_b = i if klass <= klass2
      next if rank_a && rank_b
      i += 1
    end
    return -1 if rank_a < rank_b
    return 0 if rank_a == rank_b
    return 1 if rank_a > rank_b
  end
 end
--- a/lib/treat/entities/abilities/copyable.rb
+++ b/lib/treat/entities/abilities/copyable.rb
@ -0,0 +1,47 @@
 module Treat::Entities::Abilities::Copyable
  require 'fileutils'
  # What happens when it is a database-stored
  # collection or document ?
  def copy_into(collection)
    unless collection.is_a?(
    Treat::Entities::Collection)
      raise Treat::Exception,
      "Cannot copy an entity into " +
      "something else than a collection."
    end
    if type == :document
      copy_document_into(collection)
    elsif type == :collection
      copy_collection_into(collection)
    else
      raise Treat::Exception,
      "Can only copy a document " +
      "or collection into a collection."
    end
  end
  def copy_collection_into(collection)
    copy = dup
    f = File.dirname(folder)
    f = f.split(File::SEPARATOR)[-1]
    f = File.join(collection.folder, f)
    FileUtils.mkdir(f) unless
    FileTest.directory(f)
    FileUtils.cp_r(folder, f)
    copy.set :folder, f
    copy
  end
  def copy_document_into(collection)
    copy = dup
    return copy unless file
    f = File.basename(file)
    f = File.join(collection.folder, f)
    FileUtils.cp(file, f)
    copy.set :file, f
    copy
  end
 end
--- a/lib/treat/entities/abilities/countable.rb
+++ b/lib/treat/entities/abilities/countable.rb
@ -1,4 +1,4 @@
-module Treat::Entities::Entity::Countable
+module Treat::Entities::Abilities::Countable
  # Find the position of the current entity
  # inside the parent entity, starting at 1.
@ -41,7 +41,6 @@ module Treat::Entities::Entity::Countable
  # Returns the frequency of the given value
  # in the this entity.
  def frequency_of(value)
    value = value.downcase
    if is_a?(Treat::Entities::Token)
      raise Treat::Exception,
      "Cannot get the frequency " +
--- a/lib/treat/entities/abilities/debuggable.rb
+++ b/lib/treat/entities/abilities/debuggable.rb
@ -0,0 +1,83 @@
 # When Treat.debug is set to true, each call to
 # #call_worker will result in a debug message being
 # printed by the #print_debug function.
 module Treat::Entities::Abilities::Debuggable
  @@prev = nil
  @@i = 0
  # Explains what Treat is currently doing.
  def print_debug(entity, task, worker, group, options)
    targs = group.targets.map do |target|
      target.to_s
    end
    if targs.size == 1
      t = targs[0]
    else
      t = targs[0..-2].join(', ') +
      ' and/or ' + targs[-1]
    end
    genitive = targs.size > 1 ?
    'their' : 'its'
    doing = ''
    human_task = task.to_s.gsub('_', ' ')
    if group.type == :transformer ||
      group.type == :computer
      tt = human_task
      tt = tt[0..-2] if tt[-1] == 'e'
      ed = tt[-1] == 'd' ? '' : 'ed'
      doing = "#{tt.capitalize}#{ed} #{t}"
    elsif group.type == :annotator
      if group.preset_option
        opt = options[group.preset_option]
        form = opt.to_s.gsub('_', ' ')
        human_task[-1] = ''
        human_task = form + ' ' + human_task
      end
      doing = "Annotated #{t} with " +
      "#{genitive} #{human_task}"
    end
    if group.to_s.index('Formatters')
      curr = doing +
      ' in format ' +
      worker.to_s
    else
      curr = doing +
      ' using ' +
      worker.to_s.gsub('_', ' ')
    end
    curr.gsub!('ss', 's') unless curr.index('class')
    curr += '.'
    if curr == @@prev
      @@i += 1
    else
      if @@i > 1
        Treat.core.entities.list.each do |e|
          @@prev.gsub!(e.to_s, e.to_s + 's')
        end
        @@prev.gsub!('its', 'their')
        @@prev = @@prev.split(' ').
        insert(1, @@i.to_s).join(' ')
      end
      @@i = 0
      puts @@prev     # Last call doesn't get shown.
    end
    @@prev = curr
  end
 end
--- a/lib/treat/entities/abilities/delegatable.rb
+++ b/lib/treat/entities/abilities/delegatable.rb
@ -1,7 +1,7 @@
 # Makes a class delegatable, allowing calls
 # on it to be forwarded to a worker class
 # able to perform the appropriate task.
-module Treat::Entities::Entity::Delegatable
+module Treat::Entities::Abilities::Delegatable
  # Add preset methods to an entity class.
  def add_presets(group)
@ -10,25 +10,27 @@ module Treat::Entities::Entity::Delegatable
    return unless opt
    self.class_eval do
-      group.presets.each do |preset|
+    group.presets.each do |preset|
-        define_method(preset) do |worker=nil, options={}|
+      define_method(preset) do |worker=nil, options={}|
-          return get(preset) if has?(preset)
+        return get(preset) if has?(preset)
-          options = {opt => preset}.merge(options)
+        options = {opt => preset}.merge(options)
-          m = group.method
+        m = group.method
-          send(m, worker, options)
+        send(m, worker, options)
-          f = unset(m)
+        f = unset(m)
-          features[preset] = f if f
+        features[preset] = f if f
        end
      end
    end
  end
  end
  # Add the workers to perform a task on an entity class.
  def add_workers(group)
    self.class_eval do
      task = group.method
      add_presets(group)
      define_method(task) do |worker=nil, options={}|
        if worker.is_a?(Hash)
          options, worker =
@ -62,7 +64,7 @@ module Treat::Entities::Entity::Delegatable
      worker_not_found(worker, group)
    end
-    worker = group.const_get(worker.to_s.cc.intern)
+    worker = group.const_get(cc(worker.to_s).intern)
    result = worker.send(group.method, entity, options)
    if group.type == :annotator && result
@ -88,32 +90,40 @@ module Treat::Entities::Entity::Delegatable
  # Get the default worker for that language
  # inside the given group.
  def find_worker_for_language(language, group)
    lang = Treat.languages[language]
    cat = group.to_s.split('::')[2].downcase.intern
-    group = group.mn.ucc.intern
+    group = ucc(cl(group)).intern
    if lang.nil?
      raise Treat::Exception,
      "No configuration file loaded for language #{language}."
    end
    workers = lang.workers
    if !workers.respond_to?(cat) ||
       !workers[cat].respond_to?(group)
        workers = Treat.languages.agnostic.workers
    end
    if !workers.respond_to?(cat) || 
       !workers[cat].respond_to?(group)
      raise Treat::Exception,
      "No #{group} is/are available for the " +
      "#{language.to_s.capitalize} language."
    end
    workers[cat][group].first
  end
  # Return an error message and suggest possible typos.
-  def worker_not_found(worker, group)
+  def worker_not_found(klass, group)
-    "Worker with name '#{worker}' couldn't be "+
+    "Algorithm '#{ucc(cl(klass))}' couldn't be "+
-    "found in group #{group}." + Treat::Helpers::Help.
+    "found in group #{group}." + did_you_mean?(
-    did_you_mean?(group.list.map { |c| c.ucc }, worker)
+    group.list.map { |c| ucc(c) }, ucc(klass))
  end
 end
--- a/lib/treat/entities/entity/applicable.rb
+++ b/lib/treat/entities/entity/applicable.rb
@ -1,8 +1,8 @@
 # Implement support for the functions #do and #do_task.
-module Treat::Entities::Entity::Applicable
+module Treat::Entities::Abilities::Doable
  # Perform the supplied tasks on the entity.
-  def apply(*tasks)
+  def do(*tasks)
    tasks.each do |task|
      if task.is_a?(Hash)
@ -25,8 +25,6 @@ module Treat::Entities::Entity::Applicable
    end
    self
  end
  alias :do :apply
  # Perform an individual task on an entity
  # given a worker and options to pass to it.
@ -35,7 +33,7 @@ module Treat::Entities::Entity::Applicable
    entity_types = group.targets
    f = nil
    entity_types.each do |t|
-      f = true if is_a?(Treat::Entities.const_get(t.cc))
+      f = true if is_a?(Treat::Entities.const_get(cc(t)))
    end
    if f || entity_types.include?(:entity)
      send(task, worker, options)
--- a/lib/treat/entities/abilities/exportable.rb
+++ b/lib/treat/entities/abilities/exportable.rb
@ -1,7 +1,7 @@
-module Treat::Entities::Entity::Exportable
+module Treat::Entities::Abilities::Exportable
  def export(problem)
-    ds = Treat::Learning::DataSet.new(problem)
+    ds = Treat::Core::DataSet.new(problem)
    each_entity(problem.question.target) do |e|
      ds << e
    end
--- a/lib/treat/entities/abilities/iterable.rb
+++ b/lib/treat/entities/abilities/iterable.rb
@ -1,4 +1,4 @@
-module Treat::Entities::Entity::Iterable
+module Treat::Entities::Abilities::Iterable
  # Yields each entity of any of the supplied
  # types in the children tree of this Entity.
@ -6,12 +6,12 @@ module Treat::Entities::Entity::Iterable
  # #each. It does not yield the top element being
  # recursed.
  #
-  # This function NEEDS to be ported to C. #FIXME
+  # This function NEEDS to be ported to C.
  def each_entity(*types)
    types = [:entity] if types.size == 0
    f = false
    types.each do |t2|
-      if is_a?(Treat::Entities.const_get(t2.cc))
+      if is_a?(Treat::Entities.const_get(cc(t2)))
        f = true; break
      end
    end
@ -57,7 +57,7 @@ module Treat::Entities::Entity::Iterable
  def ancestor_with_type(type)
    return unless has_parent?
    ancestor = @parent
-    type_klass = Treat::Entities.const_get(type.cc)
+    type_klass = Treat::Entities.const_get(cc(type))
    while not ancestor.is_a?(type_klass)
      return nil unless (ancestor && ancestor.has_parent?)
      ancestor = ancestor.parent
@ -94,17 +94,25 @@ module Treat::Entities::Entity::Iterable
  end
  # Number of children that have a given feature.
-  # Second variable to allow for passing value to check for.
+  def num_children_with_feature(feature)
  def num_children_with_feature(feature, value = nil, recursive = true)
    i = 0
-    m = method(recursive ? :each_entity : :each)
+    each do |c| 
-    m.call do |c|
+      i += 1 if c.has?(feature)
      next unless c.has?(feature)
      i += (value == nil ? 1 : 
      (c.get(feature) == value ? 1 : 0))
    end
    i
  end
  # Return the first element in the array, warning if not
  # the only one in the array. Used for magic methods: e.g.,
  # the magic method "word" if called on a sentence with many 
  # words, Treat will return the first word, but warn the user.
  def first_but_warn(array, type)
    if array.size > 1
      warn "Warning: requested one #{type}, but" +
      " there are many #{type}s in this entity."
    end
    array[0]
  end
 end
--- a/lib/treat/entities/abilities/magical.rb
+++ b/lib/treat/entities/abilities/magical.rb
@ -1,20 +1,27 @@
-module Treat::Entities::Entity::Magical
+module Treat::Entities::Abilities::Magical
  # Parse "magic methods", which allow the following
  # syntaxes to be used (where 'word' can be replaced
  # by any entity type, e.g. token, zone, etc.):
  #
-  # - each_word : iterate over each children of type word.
+  # - each_word : iterate over each entity of type word.
-  # - words: return an array of children words.
+  # - words: return an array of words in the entity.
  # - word: return the first word in the entity.
  # - word_count: return the number of words in the entity.
-  # - words_with_*(value) (where * is an arbitrary feature):
+  # - words_with_*(value) (where  is an arbitrary feature):
-  #   return the words that have the given feature set to value.
+  #   return the words that have the given feature.
  # - word_with_*(value) : return the first word with
  #   the feature specified by * in value.
  #
  # Also provides magical methods for types of words:
  #
  # - each_noun:
  # - nouns:
  # - noun:
  # - noun_count:
  # - nouns_with_*(value)
  # - noun_with_*(value)
  #
  # Also provides magical methods for types of words (each_noun,
  # nouns, noun_count, nouns_with_*(value) noun_with_*(value), etc.)
  # For this to be used, the words in the text must have been 
  # tokenized and categorized in the first place.
  def magic(sym, *args)
    # Cache this for performance.
@ -73,21 +80,9 @@ module Treat::Entities::Entity::Magical
    elsif method =~ /^frequency_in_#{@@entities_regexp}$/
      frequency_in($1.intern)
    else
-      return :no_magic # :-(
+      return :no_magic
    end
  end
  # Return the first element in the array, warning if not
  # the only one in the array. Used for magic methods: e.g.,
  # the magic method "word" if called on a sentence with many 
  # words, Treat will return the first word, but warn the user.
  def first_but_warn(array, type)
    if array.size > 1
      warn "Warning: requested one #{type}, but" +
      " there are many #{type}s in this entity."
    end
    array[0]
  end
 end
--- a/lib/treat/entities/abilities/registrable.rb
+++ b/lib/treat/entities/abilities/registrable.rb
@ -0,0 +1,46 @@
 # Registers occurences of textual values inside
 # all children entity. Useful to calculate frequency.
 module Treat::Entities::Abilities::Registrable
  # Registers a token in the @registry hash.
  def register(entity)
    unless @registry
      @count = 0
      @registry = {
        :value => {}, 
        :position => {}, 
        :type => {}, 
        :id => {}
      }
    end
    if entity.is_a?(Treat::Entities::Token) ||
      entity.is_a?(Treat::Entities::Phrase)
      val = entity.to_s.downcase
      @registry[:value][val] ||= 0
      @registry[:value][val] += 1
    end
    @registry[:id][entity.id] = true
    @registry[:type][entity.type] ||= 0
    @registry[:type][entity.type] += 1
    @registry[:position][entity.id] = @count
    @count += 1
    @parent.register(entity) if has_parent?
  end
  # Backtrack up the tree to find a token registry,
  # by default the one in the root node of any entity.
  def registry(type = nil)
    if has_parent? &&
      type != self.type
      @parent.registry(type)
    else
      @registry
    end
  end
 end
--- a/lib/treat/entities/abilities/stringable.rb
+++ b/lib/treat/entities/abilities/stringable.rb
@ -1,22 +1,18 @@
 # Gives entities the ability to be converted
 # to string representations (#to_string, #to_s,
 # #to_str, #inspect, #print_tree).
-module Treat::Entities::Entity::Stringable
+module Treat::Entities::Abilities::Stringable
-  
+
-  # Returns the entity's true string value.
+  # Return the entity's true string value in
-  def to_string;  @value.dup; end
+  # plain text format. Non-terminal entities
-  
+  # will normally have an empty value.
-  # Returns an array of the childrens' string
+  def to_string; @value; end
-  # values, found by calling #to_s on them.
+
  def to_a; @children.map { |c| c.to_s }; end
  alias :to_ary :to_a
  # Returns the entity's string value by
  # imploding the value of all terminal
  # entities in the subtree of that entity.
  def to_s
-    has_children? ? implode.strip : @value.dup
+    @value != '' ? @value : implode.strip
  end
  # #to_str is the same as #to_s.
@ -28,10 +24,12 @@ module Treat::Entities::Entity::Stringable
  def short_value(max_length = 30)
    s = to_s
    words = s.split(' ')
-    return s if (s.length < max_length) ||
+    if s.length < max_length
-    !(words[0..2] && words[-2..-1])
+      s
-    words[0..2].join(' ') + ' [...] ' +
+    else
-    words[-2..-1].join(' ')
+      words[0..2].join(' ') + ' [...] ' +
      words[-2..-1].join(' ')
    end
  end
  # Print out an ASCII representation of the tree.
@ -40,8 +38,7 @@ module Treat::Entities::Entity::Stringable
  # Return an informative string representation
   # of the entity.
   def inspect
-     name = self.class.mn
+     s = "#{cl(self.class)} (#{@id.to_s})"
     s = "#{name} (#{@id.to_s})"
     if caller_method(2) == :inspect
       @id.to_s
     else
@ -58,14 +55,16 @@ module Treat::Entities::Entity::Stringable
   end
  # Helper method to implode the string value of the subtree.
-  def implode(value = "")
+  def implode
    return @value.dup if !has_children?
    value = ''
    each do |child|
      if child.is_a?(Treat::Entities::Section)
-        value << "\n\n"
+        value += "\n\n"
      end
      if child.is_a?(Treat::Entities::Token) || child.value != ''
@ -73,14 +72,14 @@ module Treat::Entities::Entity::Stringable
          child.is_a?(Treat::Entities::Enclitic)
          value.strip!
        end
-        value << child.to_s + ' '
+        value += child.to_s + ' '
      else
-        child.implode(value)
+        value += child.implode
      end
      if child.is_a?(Treat::Entities::Title) ||
        child.is_a?(Treat::Entities::Paragraph)
-        value << "\n\n"
+        value += "\n\n"
      end
    end
--- a/lib/treat/entities/collection.rb
+++ b/lib/treat/entities/collection.rb
@ -0,0 +1,40 @@
 module Treat::Entities
  # Represents a collection of texts.
  class Collection < Entity
    # Initialize the collection with a folder
    # containing the texts of the collection.
    def initialize(folder = nil, id = nil)
      super('', id)
      if folder
        if !FileTest.directory?(folder)
          FileUtils.mkdir(folder) 
        end
        set :folder, folder if folder
        i = folder + '/.index'
        if FileTest.directory?(i)
          set :index, i
        end
      end
    end
    # Works like the default <<, but if the
    # file being added is a collection or a
    # document, then copy that collection or
    # document into this collection's folder.
    def <<(entities, copy = true)
      unless entities.is_a?(Array)
        entities = [entities]
      end
      entities.each do |entity|
        if [:document, :collection].
          include?(entity.type) && copy &&
          @features[:folder] != nil
          entity = entity.copy_into(self)
        end
      end
      super(entities)
    end
  end
 end
--- a/lib/treat/entities/document.rb
+++ b/lib/treat/entities/document.rb
@ -0,0 +1,10 @@
 module Treat::Entities
  # Represents a document.
  class Document < Entity
    # Initialize a document with a file name.
    def initialize(file = nil, id = nil)
      super('', id)
      set :file, file
    end
  end
 end
--- a/lib/treat/entities/entities.rb
+++ b/lib/treat/entities/entities.rb
@ -1,101 +0,0 @@
 module Treat::Entities
  # * Collection and document classes * #
  # Represents a collection.
  class Collection < Entity; end
  # Represents a document.
  class Document < Entity; end
  # * Sections and related classes * #
  # Represents a section.
  class Section < Entity; end
  # Represents a page of text.
  class Page < Section; end
  # Represents a block of text 
  class Block < Section; end
  # Represents a list.
  class List < Section; end
  # * Zones and related classes * #
  # Represents a zone of text.
  class Zone < Entity; end
  # Represents a title, subtitle, 
  # logical header of a text.
  class Title < Zone; end
  # Represents a paragraph (group 
  # of sentences and/or phrases).
  class Paragraph < Zone; end
  # * Groups and related classes * #
  # Represents a group of tokens.
  class Group < Entity; end
  # Represents a group of words 
  # with a sentence ender (.!?)
  class Sentence < Group; end
  # Represents a group of words,
  # with no sentence ender.
  class Phrase < Group; end
  # Represents a non-linguistic
  # fragment (e.g. stray symbols).
  class Fragment < Group; end
  # * Tokens and related classes* #
  # Represents a terminal element 
  # (leaf) in the text structure.
  class Token < Entity; end
  # Represents a word.  Strictly,
  # this is /^[[:alpha:]\-']+$/.
  class Word < Token; end
  # Represents an enclitic.
  # Strictly, this is any of 
  # 'll 'm 're 's 't or 've.
  class Enclitic < Token; end
  # Represents a number. Strictly,
  # this is /^#?([0-9]+)(\.[0-9]+)?$/.
  class Number < Token
    def to_i; to_s.to_i; end
    def to_f; to_s.to_f; end
  end
  # Represents a punctuation sign.
  # Strictly, this is /^[[:punct:]\$]+$/.
  class Punctuation < Token; end
  # Represents a character that is neither
  # a word, an enclitic, a number or a
  # punctuation character (e.g. @#$%&*).
  class Symbol < Token; end
  # Represents a url. This is (imperfectly)
  # defined as /^(http|https):\/\/[a-z0-9] 
  # +([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}
  # (([0-9]{1,5})?\/.*)?$/ix
  class Url < Token; end
  # Represents a valid RFC822 address.
  # This is (imperfectly) defined as 
  # /.+\@.+\..+/ (fixme maybe?)
  class Email < Token; end
  # Represents a token whose type
  # cannot be identified.
  class Unknown; end
 end
--- a/lib/treat/entities/entity.rb
+++ b/lib/treat/entities/entity.rb
@ -1,106 +1,94 @@
 module Treat::Entities
-  # Basic tree structure.
+  module Abilities; end
  # Require abilities.
  p = Treat.paths.lib +
  'treat/entities/abilities/*.rb'
  Dir.glob(p).each { |f| require f }
  require 'birch'
-  
+
  # The Entity class extends a basic tree structure 
  # (written in C for optimal speed) and represents 
  # any form of textual entityin a processing task 
  # (this could be a collection of documents, a 
  # single document, a single paragraph, etc.)
  # 
  # Classes that extend Entity provide the concrete
  # behavior corresponding to the relevant entity type. 
  # See entities.rb for a full list and description of 
  # the different entity types in the document model.
  class Entity < ::Birch::Tree
-    # A symbol representing the lowercase
+    # A Symbol representing the lowercase
-    # version of the class name. This is 
+    # version of the class name.
    # the only attribute that the Entity
    # class adds to the Birch::Tree class.
    attr_accessor :type
    # Autoload all the classes in /abilities.
    path = File.expand_path(__FILE__)
    patt = File.dirname(path) + '/entity/*.rb'
    Dir.glob(patt).each { |f| require f }
    # Implements support for #register, #registry.
    include Registrable
-    # Implement support for #self.call_worker, etc.
+    # Implements support for #register,
-    extend Delegatable
+    # #registry, and #contains_* methods.
    include Abilities::Registrable
-    # Implement support for #self.print_debug, etc.
+    # Implement support for #self.add_workers
-    extend Debuggable
+    extend Abilities::Delegatable
-    # Implement support for #self.build and #self.from_*
+    # Implement support for #self.print_debug and
-    extend Buildable
+    # #self.invalid_call_msg
    extend Abilities::Debuggable
-    # Implement support for #apply (previously #do).
+    # Implement support for #self.build
-    include Applicable
+    # and #self.from_*
    extend Abilities::Buildable
-    # Implement support for #frequency, #frequency_in,
+    # Implement support for #do.
-    # #frequency_of, #position, #position_from_end, etc.
+    include Abilities::Doable
    include Countable
-    # Implement support for over 100 #magic methods!
+    # Implement support for #frequency,
-    include Magical
+    # #frequency_in_parent and #position_in_parent.
    include Abilities::Countable
    # Implement support for #magic.
    include Abilities::Magical
    # Implement support for #to_s, #inspect, etc.
-    include Stringable
+    include Abilities::Stringable
-    # Implement support for #check_has and others.
+    # Implement support for #check_has
-    include Checkable
+    # and #check_hasnt_children?
    include Abilities::Checkable
    # Implement support for #each_entity, as well as
    # #entities_with_type, #ancestors_with_type,
-    # #entities_with_feature, #entities_with_category, etc.
+    # #entities_with_feature, #entities_with_category.
-    include Iterable
+    include Abilities::Iterable
-    # Implement support for #export, allowing to export 
+    # Implement support for #export to export
-    # a data set row from the receiving entity.
+    # a line of a data set based on a classification.
-    include Exportable
+    include Abilities::Exportable
    # Implement support for #copy_into.
    include Abilities::Copyable
    # Implement support for #self.compare_with
-    extend Comparable
+    extend Abilities::Comparable
    # Initialize the entity with its value and
    # (optionally) a unique identifier. By default,
    # the object_id will be used as id.
    def initialize(value = '', id = nil)
-      id ||= object_id; super(value, id)
+      id ||= object_id
      super(value, id)
      @type = :entity if self == Entity
-      @type ||= self.class.mn.ucc.intern
+      @type ||= ucc(cl(self.class)).intern
    end
    # Add an entity to the current entity.
    # Registers the entity in the root node
    # token registry if the entity is a leaf.
-    # Unsets the parent node's value; in order
+    #
-    # to keep the tree clean, only the leaf
+    # @see Treat::Registrable
    # values are stored.
    # 
    # Takes in a single entity or an array of 
    # entities. Returns the first child supplied.
    # If a string is 
    def <<(entities, clear_parent = true)
-      entities = (entities.is_a?(::String) ||
+      unless entities.is_a? Array
-      entities.is_a?(::Numeric)) ? 
+        entities = [entities]
-      entities.to_entity : entities
+      end
-      entities = entities.is_a?(::Array) ?
+      entities.each do |entity|
-      entities : [entities]
+        register(entity)
-      # Register each entity in this node.
+      end
      entities.each { |e| register(e) }
      # Pass to the <<() method in Birch.
      super(entities)
      # Unset the parent value if necessary.
      @parent.value = '' if has_parent?
-      # Return the first child.
+      entities[0]
      return entities[0]
    end
-    
+
    # Catch missing methods to support method-like
    # access to features (e.g. entity.category
    # instead of entity.features[:category]) and to
@ -114,26 +102,29 @@ module Treat::Entities
    # sugar for the #self.build method.
    def method_missing(sym, *args, &block)
      return self.build(*args) if sym == nil
-      return @features[sym] if @features.has_key?(sym)
+
-      result = magic(sym, *args, &block)
+      if !@features.has_key?(sym)
-      return result unless result == :no_magic
+        r = magic(sym, *args, &block)
-      begin; super(sym, *args, &block)
+        return r unless r == :no_magic
-      rescue NoMethodError; invalid_call(sym); end
+        begin
          super(sym, *args, &block)
        rescue NoMethodError
          raise Treat::Exception,
          if Treat::Workers.lookup(sym)
            msg = "Method #{sym} cannot " +
            "be called on a #{type}."
          else
            msg = "Method #{sym} does not exist."
            msg += did_you_mean?(
            Treat::Workers.methods, sym)
          end
        end
      else
        @features[sym]
      end
    end
-    
+
    # Raises a Treat::Exception saying that the 
    # method called was invalid, and that the 
    # requested method does not exist. Also
    # provides suggestions for misspellings.
    def invalid_call(sym)
      msg = Treat::Workers.lookup(sym) ?
      "Method #{sym} can't be called on a #{type}." :
      "Method #{sym} is not defined by Treat." +
      Treat::Helpers::Help.did_you_mean?(
      Treat::Workers.methods, sym)
      raise Treat::Exception, msg
    end
  end
 end
--- a/lib/treat/entities/entity/debuggable.rb
+++ b/lib/treat/entities/entity/debuggable.rb
@ -1,86 +0,0 @@
 # When Treat.debug is set to true, each call to
 # #call_worker will result in a debug message being
 # printed by the #print_debug function.
 module Treat::Entities::Entity::Debuggable
  # Previous state and counter.
  @@prev, @@i = nil, 0
  # Explains what Treat is currently doing.
  # Fixme: last call will never get shown.
  def print_debug(entity, task, worker, group, options)
    # Get a list of the worker's targets.
    targets = group.targets.map(&:to_s)
    # List the worker's targets as either
    # a single target or an and/or form
    # (since it would be too costly to
    # actually determine what target types
    # were processed at runtime for each call).
    t = targets.size == 1 ? targets[0] : targets[
    0..-2].join(', ') + ' and/or ' + targets[-1]
    # Add genitive for annotations (sing./plural)
    genitive = targets.size > 1 ? 'their' : 'its'
    # Set up an empty string and humanize task name.
    doing, human_task = '', task.to_s.gsub('_', ' ')
    # Base is "{task}-ed {a(n)|N} {target(s)}"
    if [:transformer, :computer].include?(group.type)
      tt = human_task
      tt = tt[0..-2] if tt[-1] == 'e'
      ed = tt[-1] == 'd' ? '' : 'ed'
      doing = "#{tt.capitalize}#{ed} #{t}"
    # Base is "Annotated {a(n)|N} {target(s)}"
    elsif group.type == :annotator
      if group.preset_option
        opt = options[group.preset_option]
        form = opt.to_s.gsub('_', ' ')
        human_task[-1] = ''
        human_task = form + ' ' + human_task
      end
      doing = "Annotated #{t} with " +
      "#{genitive} #{human_task}"
    end
    # Form is '{base} in format {worker}'.
    if group.to_s.index('Formatters')
      curr = doing + ' in format ' + worker.to_s
    # Form is '{base} using {worker}'.
    else
      curr = doing + ' using ' + worker.to_s.gsub('_', ' ')
    end
    # Remove any double pluralization that may happen.
    curr.gsub!('ss', 's') unless curr.index('class')
    # Accumulate repeated tasks.
    @@i += 1 if curr == @@prev
    # Change tasks, so output.
    if curr != @@prev && @@prev
      # Pluralize entity names if necessary.
      if @@i > 1
        Treat.core.entities.list.each do |e|
          @@prev.gsub!(e.to_s, e.to_s + 's')
        end
        @@prev.gsub!('its', 'their')
        @@prev = @@prev.split(' ').
        insert(1, @@i.to_s).join(' ')
      # Add determiner if singular.
      else
        @@prev = @@prev.split(' ').
        insert(1, 'a').join(' ')
      end
      # Reset counter.
      @@i = 0
      # Write to stdout.
      puts @@prev + '.'
    end
    @@prev = curr
  end
 end
--- a/lib/treat/entities/entity/registrable.rb
+++ b/lib/treat/entities/entity/registrable.rb
@ -1,36 +0,0 @@
 # Registers the entities ocurring in the subtree of
 # a node as children are added. Also registers text
 # occurrences for word groups and tokens (n grams).
 module Treat::Entities::Entity::Registrable
  # Registers a token or phrase in the registry.
  # The registry keeps track of children by id,
  # by entity type, and also keeps the position
  # of the entity in its parent entity.
  def register(entity)
    unless @registry 
      @count, @registry = 0, 
      {id: {}, value: {}, position:{}, type: {}} 
    end
    if entity.is_a?(Treat::Entities::Token) ||
      entity.is_a?(Treat::Entities::Group)
      val = entity.to_s.downcase
      @registry[:value][val] ||= 0
      @registry[:value][val] += 1
    end
    @registry[:id][entity.id] = true
    @registry[:type][entity.type] ||= 0
    @registry[:type][entity.type] += 1
    @registry[:position][entity.id] = @count
    @count += 1
    @parent.register(entity) if has_parent?
  end
  # Backtrack up the tree to find a token registry,
  # by default the one in the root node of the tree.
  def registry(type = nil)
    (has_parent? && type != self.type) ?
    @parent.registry(type) : @registry
  end
 end
--- a/lib/treat/entities/group.rb
+++ b/lib/treat/entities/group.rb
@ -0,0 +1,18 @@
 module Treat::Entities
  # Represents a group of tokens.
  class Group < Entity; end
  # Represents a group of words 
  # with a sentence ender (.!?)
  class Sentence < Group; end
  # Represents a group of words,
  # with no sentence ender.
  class Phrase < Group; end
  # Represents a non-linguistic
  # fragment (e.g. stray symbols).
  class Fragment < Group; end
 end
--- a/lib/treat/entities/section.rb
+++ b/lib/treat/entities/section.rb
@ -0,0 +1,13 @@
 module Treat::Entities
  # Represents a section.
  class Section < Entity; end
  # Represents a page of text.
  class Page < Section; end
  # Represents a block of text 
  class Block < Section; end
  # Represents a list.
  class List < Section; end
 end
--- a/Show More
+++ b/Show More
`@ -1,2 +1,2 @@`
	`--format d -c`	`--format s -c`
	`--order rand`	`--order rand`
		`@ -0,0 +1,2 @@`
							`{list: [:entity, :unknown, :email, :url, :symbol, :sentence, :punctuation, :number, :enclitic, :word, :token, :fragment, :phrase, :paragraph, :title, :zone, :list, :block, :page, :section, :collection, :document],`
							`order: [:token, :fragment, :phrase, :sentence, :zone, :section, :document, :collection]}`
		`@ -0,0 +1 @@`
							`{host: 'localhost', port: '27017', db: nil }`
		`@ -0,0 +1 @@`
							`[:extractors, :inflectors, :formatters, :learners, :lexicalizers, :processors, :retrievers]`