parent
49e99ccd0d
commit
fff57a4526
|
@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
|
|||
# A helper class to load the OpenNLP package.
|
||||
class Treat::Loaders::OpenNLP < Treat::Loaders::BindIt
|
||||
|
||||
require 'open-nlp'
|
||||
|
||||
def self.load(language = nil)
|
||||
require 'open-nlp'
|
||||
super(OpenNLP, :open_nlp, language)
|
||||
end
|
||||
|
||||
|
|
|
@ -3,9 +3,8 @@ require 'treat/loaders/bind_it'
|
|||
# A helper class to load the CoreNLP package.
|
||||
class Treat::Loaders::Stanford < Treat::Loaders::BindIt
|
||||
|
||||
require 'stanford-core-nlp'
|
||||
|
||||
def self.load(language = nil)
|
||||
require 'stanford-core-nlp'
|
||||
super(StanfordCoreNLP, :stanford, language)
|
||||
end
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
module Treat
|
||||
|
||||
# The current version of Treat.
|
||||
VERSION = "2.0.5"
|
||||
VERSION = '2.0.6'
|
||||
|
||||
# Treat requires Ruby >= 1.9.2
|
||||
if RUBY_VERSION < '1.9.2'
|
||||
|
|
|
@ -48,7 +48,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|||
isolated_token = entity.is_a?(Treat::Entities::Token)
|
||||
|
||||
@@taggers[lang].apply(t_list).each do |tok|
|
||||
tokens[i].set(:tag, tok.tag)
|
||||
tokens[i].set(:tag, tok.tag.split('-').first)
|
||||
tokens[i].set(:tag_set,
|
||||
options[:tag_set]) if isolated_token
|
||||
return tok.tag if isolated_token
|
||||
|
|
|
@ -4,8 +4,6 @@
|
|||
# obtained tokens are then grouped into sentences.
|
||||
class Treat::Workers::Processors::Segmenters::Stanford
|
||||
|
||||
Treat::Loaders::Stanford.load
|
||||
|
||||
DefaultOptions = {
|
||||
:also_tokenize => false
|
||||
}
|
||||
|
@ -25,6 +23,8 @@ class Treat::Workers::Processors::Segmenters::Stanford
|
|||
# add the tokens as children of the sentence.
|
||||
def self.segment(entity, options = {})
|
||||
|
||||
Treat::Loaders::Stanford.load
|
||||
|
||||
options = DefaultOptions.merge(options)
|
||||
entity.check_hasnt_children
|
||||
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
# Maximum entropy tokenization supplied by OpenNLP.
|
||||
class Treat::Workers::Processors::Tokenizers::OpenNlp
|
||||
|
||||
require 'open-nlp'
|
||||
Treat::Loaders::OpenNLP.load
|
||||
|
||||
@@tokenizers = {}
|
||||
|
||||
# Maximum entropy tokenization.
|
||||
def self.tokenize(entity, options = {})
|
||||
|
||||
Treat::Loaders::OpenNLP.load
|
||||
|
||||
lang = entity.language
|
||||
str = entity.to_s
|
||||
|
||||
|
|
|
@ -7,8 +7,6 @@
|
|||
# single forward- and backward- quotes (`` and '') by default.
|
||||
class Treat::Workers::Processors::Tokenizers::Stanford
|
||||
|
||||
Treat::Loaders::Stanford.load
|
||||
|
||||
# Default options for the tokenizer.
|
||||
DefaultOptions = {
|
||||
directional_quotes: false,
|
||||
|
@ -26,6 +24,7 @@ class Treat::Workers::Processors::Tokenizers::Stanford
|
|||
# to attempt to get correct directional quotes,
|
||||
# replacing "..." by ``...''. Off by default.
|
||||
def self.tokenize(entity, options = {})
|
||||
Treat::Loaders::Stanford.load
|
||||
options = DefaultOptions.merge(options)
|
||||
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
|
||||
entity.check_hasnt_children
|
||||
|
|
Loading…
Reference in New Issue