fix language detection and tweak tests

This commit is contained in:
Rob Anderton 2014-01-28 11:56:03 +00:00
parent 0ed8a37761
commit 65235fb935
2 changed files with 12 additions and 11 deletions

View File

@ -1,9 +1,9 @@
# Language detection using a probabilistic algorithm
# that checks for the presence of words with Bloom
# that checks for the presence of words with Bloom
# filters built from dictionaries for each language.
#
# Original paper: Grothoff. 2007. A Quick Introduction to
# Bloom Filters. Department of Computer Sciences, Purdue
# Original paper: Grothoff. 2007. A Quick Introduction to
# Bloom Filters. Department of Computer Sciences, Purdue
# University.
class Treat::Workers::Extractors::Language::WhatLanguage
@ -35,7 +35,7 @@ class Treat::Workers::Extractors::Language::WhatLanguage
options = DefaultOptions.merge(options)
@@detector ||= ::WhatLanguage.new(:possibilities)
@@detector ||= ::WhatLanguage.new(:all)
possibilities = @@detector.process_text(entity.to_s)
lang = {}

View File

@ -434,14 +434,15 @@ module Treat::Specs::Entities
it "guesses the language of the entity" do
Treat.core.language.detect = true
a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran? - Pablo Picasso'
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France. - Goethe'
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
a = 'I want to know God\'s thoughts; the rest are details.' # Albert Einstein
b = 'El mundo de hoy no tiene sentido, asi que por que deberia pintar cuadros que lo tuvieran?' # Pablo Picasso
c = 'Un bon Allemand ne peut souffrir les Francais, mais il boit volontiers les vins de France.' # Goethe
d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen.' # Friedrich Nietzsche
a.language.should eql :english
#b.language.should eql :spanish
#c.language.should eql :french
#d.language.should eql :german
b.language.should eql :spanish
c.language.should eql :french
d.language.should eql :german
# Reset default
Treat.core.language.detect = false