updated old ocropus commands
This commit is contained in:
parent
20e3aa1bb7
commit
5c984a1dbb
|
@ -0,0 +1,42 @@
|
|||
|
||||
#include <math.h>
|
||||
|
||||
void sigmoid(int n,double v[n],double out[n]) {
|
||||
for(int i=0;i<n;i++) {
|
||||
double x = v[i];
|
||||
x = x<-100?-100:x>100?100:x;
|
||||
out[i] = 1.0/(1.0+exp(-x));
|
||||
}
|
||||
}
|
||||
|
||||
void dotplus(int n,int m,double v[n],double a[n][m],double u[m]) {
|
||||
for(int i=0;i<n;i++) {
|
||||
double total;
|
||||
for(int j=0;j<m;j++) total += a[i][j]*u[j];
|
||||
v[i] += total;
|
||||
}
|
||||
}
|
||||
|
||||
void prodplus(int n,double u[n],double v[n],double out[n]) {
|
||||
for(int i=0;i<n;i++) {
|
||||
out[i] += u[i]*v[i];
|
||||
}
|
||||
}
|
||||
|
||||
void sumouter(int r,int n,int m,double a[n][m],double u[r][n],double v[r][m]) {
|
||||
for(int i=0;i<n;i++) {
|
||||
for(int j=0;j<m;j++) {
|
||||
double total = 0.0;
|
||||
for(int k=0;k<r;k++) total += u[k][i]*v[k][j];
|
||||
a[i][j] = total;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sumprod(int r,int n,double u[r][n],double v[r][n],double a[n]) {
|
||||
for(int i=0;i<n;i++) {
|
||||
double total = 0.0;
|
||||
for(int k=0;k<r;k++) total += u[k][i]*v[k][i];
|
||||
a[i] = total;
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -7,8 +7,8 @@
|
|||
# statistical natural language modeling, and multi-
|
||||
# lingual capabilities."
|
||||
#
|
||||
# Original paper: Google Ocropus Engine: Breuel,
|
||||
# Thomas M. The Ocropus Open Source OCR System.
|
||||
# Original paper: Google Ocropus Engine: Breuel,
|
||||
# Thomas M. The Ocropus Open Source OCR System.
|
||||
# DFKI and U. Kaiserslautern, Germany.
|
||||
class Treat::Workers::Formatters::Readers::Image
|
||||
|
||||
|
@ -18,26 +18,33 @@ class Treat::Workers::Formatters::Readers::Image
|
|||
#
|
||||
# - (Boolean) :silent => whether to silence Ocropus.
|
||||
def self.read(document, options = {})
|
||||
|
||||
|
||||
read = lambda do |doc|
|
||||
self.create_temp_dir do |tmp|
|
||||
`ocropus book2pages #{tmp}/out #{doc.file}`
|
||||
`ocropus pages2lines #{tmp}/out`
|
||||
`ocropus lines2fsts #{tmp}/out`
|
||||
`ocropus buildhtml #{tmp}/out > #{tmp}/output.html`
|
||||
doc.set :file, "#{tmp}/output.html"
|
||||
# `ocropus book2pages #{tmp}/out #{doc.file}`
|
||||
# `ocropus pages2lines #{tmp}/out`
|
||||
# `ocropus lines2fsts #{tmp}/out`
|
||||
# `ocropus buildhtml #{tmp}/out > #{tmp}/output.html`
|
||||
|
||||
`ocropus-nlbin -o #{tmp}/out #{doc.file}`
|
||||
`ocropus-gpageseg #{tmp}/out/????.bin.png --minscale 2`
|
||||
`ocropus-rpred #{tmp}/out/????/??????.bin.png`
|
||||
`ocropus-hocr #{tmp}/out/????.bin.png -o #{tmp}/book.html`
|
||||
doc.set :file, "#{tmp}/book.html"
|
||||
doc.set :format, :html
|
||||
|
||||
doc = doc.read(:html)
|
||||
end
|
||||
end
|
||||
|
||||
Treat.core.verbosity.silence ? silence_stdout {
|
||||
|
||||
|
||||
Treat.core.verbosity.silence ? silence_stdout {
|
||||
read.call(document) } : read.call(document)
|
||||
|
||||
|
||||
document
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
# Create a dire that gets deleted after execution of the block.
|
||||
def self.create_temp_dir(&block)
|
||||
if not FileTest.directory?(Treat.paths.tmp)
|
||||
|
@ -50,5 +57,5 @@ class Treat::Workers::Formatters::Readers::Image
|
|||
ensure
|
||||
FileUtils.rm_rf(dname)
|
||||
end
|
||||
|
||||
|
||||
end
|
Loading…
Reference in New Issue