Merge pull request #55 from wstrinz/master

Update Ocropus calls to work with v 0.7
This commit is contained in:
Louis Mullie 2013-06-05 13:13:28 -07:00
commit a8448f8103
1 changed files with 15 additions and 15 deletions

View File

@ -7,8 +7,8 @@
# statistical natural language modeling, and multi-
# lingual capabilities."
#
# Original paper: Google Ocropus Engine: Breuel,
# Thomas M. The Ocropus Open Source OCR System.
# Original paper: Google Ocropus Engine: Breuel,
# Thomas M. The Ocropus Open Source OCR System.
# DFKI and U. Kaiserslautern, Germany.
class Treat::Workers::Formatters::Readers::Image
@ -18,27 +18,27 @@ class Treat::Workers::Formatters::Readers::Image
#
# - (Boolean) :silent => whether to silence Ocropus.
def self.read(document, options = {})
read = lambda do |doc|
self.create_temp_dir do |tmp|
`ocropus book2pages #{tmp}/out #{doc.file}`
`ocropus pages2lines #{tmp}/out`
`ocropus lines2fsts #{tmp}/out`
`ocropus buildhtml #{tmp}/out > #{tmp}/output.html`
doc.set :file, "#{tmp}/output.html"
`ocropus-nlbin -o #{tmp}/out #{doc.file}`
`ocropus-gpageseg #{tmp}/out/????.bin.png --minscale 2`
`ocropus-rpred #{tmp}/out/????/??????.bin.png`
`ocropus-hocr #{tmp}/out/????.bin.png -o #{tmp}/book.html`
doc.set :file, "#{tmp}/book.html"
doc.set :format, :html
doc = doc.read(:html)
end
end
Treat.core.verbosity.silence ? silence_stdout {
Treat.core.verbosity.silence ? silence_stdout {
read.call(document) } : read.call(document)
document
end
# Create a dire that gets deleted after execution of the block.
# Create a dir that gets deleted after execution of the block.
def self.create_temp_dir(&block)
if not FileTest.directory?(Treat.paths.tmp)
FileUtils.mkdir(Treat.paths.tmp)
@ -50,5 +50,5 @@ class Treat::Workers::Formatters::Readers::Image
ensure
FileUtils.rm_rf(dname)
end
end