fix language detection and tweak tests
This commit is contained in:
parent
0ed8a37761
commit
65235fb935
|
@ -1,9 +1,9 @@
|
|||
# Language detection using a probabilistic algorithm
|
||||
# that checks for the presence of words with Bloom
|
||||
# that checks for the presence of words with Bloom
|
||||
# filters built from dictionaries for each language.
|
||||
#
|
||||
# Original paper: Grothoff. 2007. A Quick Introduction to
|
||||
# Bloom Filters. Department of Computer Sciences, Purdue
|
||||
# Original paper: Grothoff. 2007. A Quick Introduction to
|
||||
# Bloom Filters. Department of Computer Sciences, Purdue
|
||||
# University.
|
||||
class Treat::Workers::Extractors::Language::WhatLanguage
|
||||
|
||||
|
@ -35,7 +35,7 @@ class Treat::Workers::Extractors::Language::WhatLanguage
|
|||
|
||||
options = DefaultOptions.merge(options)
|
||||
|
||||
@@detector ||= ::WhatLanguage.new(:possibilities)
|
||||
@@detector ||= ::WhatLanguage.new(:all)
|
||||
possibilities = @@detector.process_text(entity.to_s)
|
||||
lang = {}
|
||||
|
||||
|
|
|
@ -434,14 +434,15 @@ module Treat::Specs::Entities
|
|||
it "guesses the language of the entity" do
|
||||
|
||||
Treat.core.language.detect = true
|
||||
a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
|
||||
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
|
||||
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
|
||||
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
|
||||
a = 'I want to know God\'s thoughts; the rest are details.' # Albert Einstein
|
||||
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran?' # Pablo Picasso
|
||||
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France.' # Goethe
|
||||
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen.' # Friedrich Nietzsche
|
||||
|
||||
a.language.should eql :english
|
||||
#b.language.should eql :spanish
|
||||
#c.language.should eql :french
|
||||
#d.language.should eql :german
|
||||
b.language.should eql :spanish
|
||||
c.language.should eql :french
|
||||
d.language.should eql :german
|
||||
|
||||
# Reset default
|
||||
Treat.core.language.detect = false
|
||||
|
|
Loading…
Reference in New Issue