Fix issues #54 and #57

This commit is contained in:
Louis Mullie 2013-06-27 17:58:21 -04:00
parent 49e99ccd0d
commit fff57a4526
7 changed files with 9 additions and 13 deletions

View File

@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
# A helper class to load the OpenNLP package.
class Treat::Loaders::OpenNLP < Treat::Loaders::BindIt
require 'open-nlp'
def self.load(language = nil)
require 'open-nlp'
super(OpenNLP, :open_nlp, language)
end

View File

@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
# A helper class to load the CoreNLP package.
class Treat::Loaders::Stanford < Treat::Loaders::BindIt
require 'stanford-core-nlp'
def self.load(language = nil)
require 'stanford-core-nlp'
super(StanfordCoreNLP, :stanford, language)
end

View File

@ -1,7 +1,7 @@
module Treat
# The current version of Treat.
VERSION = "2.0.5"
VERSION = '2.0.6'
# Treat requires Ruby >= 1.9.2
if RUBY_VERSION < '1.9.2'

View File

@ -48,7 +48,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
isolated_token = entity.is_a?(Treat::Entities::Token)
@@taggers[lang].apply(t_list).each do |tok|
tokens[i].set(:tag, tok.tag)
tokens[i].set(:tag, tok.tag.split('-').first)
tokens[i].set(:tag_set,
options[:tag_set]) if isolated_token
return tok.tag if isolated_token

View File

@ -4,8 +4,6 @@
# obtained tokens are then grouped into sentences.
class Treat::Workers::Processors::Segmenters::Stanford
Treat::Loaders::Stanford.load
DefaultOptions = {
:also_tokenize => false
}
@ -25,6 +23,8 @@ class Treat::Workers::Processors::Segmenters::Stanford
# add the tokens as children of the sentence.
def self.segment(entity, options = {})
Treat::Loaders::Stanford.load
options = DefaultOptions.merge(options)
entity.check_hasnt_children

View File

@ -1,14 +1,13 @@
# Maximum entropy tokenization supplied by OpenNLP.
class Treat::Workers::Processors::Tokenizers::OpenNlp
require 'open-nlp'
Treat::Loaders::OpenNLP.load
@@tokenizers = {}
# Maximum entropy tokenization.
def self.tokenize(entity, options = {})
Treat::Loaders::OpenNLP.load
lang = entity.language
str = entity.to_s

View File

@ -7,8 +7,6 @@
# single forward- and backward- quotes (`` and '') by default.
class Treat::Workers::Processors::Tokenizers::Stanford
Treat::Loaders::Stanford.load
# Default options for the tokenizer.
DefaultOptions = {
directional_quotes: false,
@ -26,6 +24,7 @@ class Treat::Workers::Processors::Tokenizers::Stanford
# to attempt to get correct directional quotes,
# replacing "..." by ``...''. Off by default.
def self.tokenize(entity, options = {})
Treat::Loaders::Stanford.load
options = DefaultOptions.merge(options)
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
entity.check_hasnt_children