forked from tleyden/open-ocr
-
Notifications
You must be signed in to change notification settings - Fork 2
/
sandwich_engine_test.go
104 lines (87 loc) · 3.67 KB
/
sandwich_engine_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
package ocrworker
import (
"encoding/json"
"os"
"testing"
"github.com/rs/zerolog/log"
"github.com/couchbaselabs/go.assert"
)
func TestSandwichEngineWithRequest(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
engine := SandwichEngine{}
bytes, err := os.ReadFile("docs/testimage.pdf")
assert.True(t, err == nil)
cFlags := make(map[string]interface{})
cFlags["tessedit_char_whitelist"] = "0123456789"
cFlags["ocr_type"] = "ocrlayeronly"
ocrRequest := OcrRequest{
ImgBytes: bytes,
EngineType: EngineSandwichTesseract,
EngineArgs: cFlags,
TimeOut: 30,
}
workerConfig := workerConfigForTests()
assert.True(t, err == nil)
result, err := engine.ProcessRequest(&ocrRequest, &workerConfig)
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}
func TestSandwichEngineWithJson(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
var testJsons []string
/*testJsons = append(testJsons, `{"engine":"sandwich"}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{}}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":null}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{"config_vars":{"tessedit_char_whitelist":"0123456789"}, "psm":"1"}}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{"config_vars":{"tessedit_create_hocr":"1", "tessedit_pageseg_mode":"1"}, "psm":"3"}}`)*/
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{"lang":"deu", "ocr_type":"ocrlayeronly","result_optimize":true}}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{"lang":"deu", "ocr_type":"combinedpdf","result_optimize":true}}`)
testJsons = append(testJsons, `{"engine":"sandwich", "engine_args":{"lang":"deu", "ocr_type":"combinedpdf","result_optimize":false}}`)
for _, testJson := range testJsons {
log.Info().Str("component", "TEST").Interface("testJson", testJson)
ocrRequest := OcrRequest{TimeOut: 60}
workerConfig := workerConfigForTests()
err := json.Unmarshal([]byte(testJson), &ocrRequest)
assert.True(t, err == nil)
bytes, err := os.ReadFile("docs/testimage.pdf")
assert.True(t, err == nil)
ocrRequest.ImgBytes = bytes
engine := NewOcrEngine(ocrRequest.EngineType)
result, err := engine.ProcessRequest(&ocrRequest, &workerConfig)
log.Error().Err(err).Str("component", "TEST")
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}
}
func TestNewsandwichEngineArgs(t *testing.T) {
testJSON := `{"engine":"sandwich", "engine_args":{"config_vars":{"tessedit_char_whitelist":"0123456789"},"ocr_type":"combinedpdf", "psm":"0", "lang":"eng"}}`
ocrRequest := OcrRequest{}
workerConfig := workerConfigForTests()
err := json.Unmarshal([]byte(testJSON), &ocrRequest)
assert.True(t, err == nil)
engineArgs, err := NewSandwichEngineArgs(&ocrRequest, &workerConfig)
assert.True(t, err == nil)
assert.Equals(t, len(engineArgs.configVars), 1)
assert.Equals(t, engineArgs.configVars["tessedit_char_whitelist"], "0123456789")
// assert.Equals(t, engineArgs.pageSegMode, "0")
assert.Equals(t, engineArgs.lang, "eng")
}
func TestSandwichEngineWithFile(t *testing.T) {
if testing.Short() {
t.Skip("skipping test in short mode.")
}
engine := SandwichEngine{}
engineArgs := SandwichEngineArgs{}
engineArgs.ocrType = "combinedpdf"
engineArgs.ocrOptimize = true
engineArgs.lang = "deu"
engineArgs.saveFiles = true
result, err := engine.processImageFile("docs/testimage.pdf", "PDF", &engineArgs, 20)
log.Warn().Err(err).Str("component", "TEST")
assert.True(t, err == nil)
log.Info().Str("component", "TEST").Interface("result", result)
}