Skip to content

Commit

Permalink
leptonica ASAN fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kuro337 committed Apr 23, 2024
1 parent 500edae commit 9b78a5a
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 72 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ test_images
bulk
tests/bulk_test.cc
CMakebkp.txt
.vscode
20 changes: 13 additions & 7 deletions src/conversion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ void createPDF(const std::string &input_path,
delete renderer;
}

auto extractTextFromImageFileLeptonica(const std::string &file_path,
const std::string &lang) -> std::string {
auto extractTextFromImageFileLeptonica(const std::string &file_path, const std::string &lang)
-> std::string {
auto *api = new tesseract::TessBaseAPI();
if (api->Init(nullptr, "eng") != 0) {
fprintf(stderr, "Could not initialize tesseract.\n");
Expand All @@ -70,10 +70,12 @@ auto extractTextFromImageFileLeptonica(const std::string &file_path,
// fully automatic - suitable for single columns of text

api->SetPageSegMode(tesseract::PSM_AUTO);

api->SetImage(image);
std::string outText(api->GetUTF8Text());
outText = api->GetUTF8Text();

// Get the text from the image.
char *rawText = api->GetUTF8Text();
std::string outText(rawText);
delete[] rawText; // Free the memory allocated by GetUTF8Text

api->End();
delete api;
Expand All @@ -90,8 +92,12 @@ auto extractTextLSTM(const std::string &file_path, const std::string &lang) -> s
Pix *image = pixRead(file_path.c_str());

api->SetImage(image);
std::string outText(api->GetUTF8Text());
outText = api->GetUTF8Text();

// Get the text from the image.
char *rawText = api->GetUTF8Text();
std::string outText(rawText);

delete[] rawText; // Free the memory allocated by GetUTF8Text

api->End();
delete api;
Expand Down
61 changes: 0 additions & 61 deletions tests/bulk_test.cc

This file was deleted.

14 changes: 10 additions & 4 deletions tests/ocr_test.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <conversion.h>
#include <curl/curl.h>
#include <filesystem>
#include <fs.h>
#include <gtest/gtest.h>
#include <gtest/internal/gtest-port.h>
Expand Down Expand Up @@ -49,14 +50,19 @@ TEST_F(ImageProcessingTests, EnvironmentTest) {
TEST_F(ImageProcessingTests, ConvertSingleImageToTextFile) {
imgstr::ImgProcessor imageTranslator;

imageTranslator.convertImageToTextFile(fpaths[0], tempDir);
std::string imagePath = fpaths[0];

auto filename = llvm::SmallString<256>(llvm::sys::path::filename(fpaths[0]));
imageTranslator.convertImageToTextFile(imagePath, tempDir);

llvm::SmallString<256> filename(llvm::sys::path::filename(imagePath));

llvm::sys::path::replace_extension(filename, ".txt");

auto fname = tempDir + "/" + filename;
llvm::outs() << "Single Image Test File: " << fpaths[0] << ", LF: " << fname << '\n';
// auto fname = tempDir + "/" + filename;

std::string fname = tempDir + "/" + std::string(filename.c_str());

llvm::outs() << "Single Image Test File: " << imagePath << ", LF: " << fname << '\n';

bool fileExists = llvm::sys::fs::exists(tempDir + "/" + filename);

Expand Down

0 comments on commit 9b78a5a

Please sign in to comment.