Skip to content

Commit

Permalink
fs resiliency
Browse files Browse the repository at this point in the history
  • Loading branch information
kuro337 committed Apr 20, 2024
1 parent c6e6b8c commit 6ea283c
Show file tree
Hide file tree
Showing 12 changed files with 994 additions and 654 deletions.
27 changes: 14 additions & 13 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# clang-format -i <filename> to run in place - and validate Config
BasedOnStyle: LLVM
IndentWidth: 4
ColumnLimit: 120
ColumnLimit: 100
SpaceAfterCStyleCast: true
UseTab: Never
AlignTrailingComments: true
Expand Down Expand Up @@ -40,13 +40,12 @@ SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
AllowShortFunctionsOnASingleLine: None
AllowShortFunctionsOnASingleLine: All # allows (All,Empty)Constructor {} to be on same line and short funcs
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: None
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes

AllowAllParametersOfDeclarationOnNextLine: true
AllowAllArgumentsOnNextLine: true
BraceWrapping:
Expand All @@ -61,22 +60,16 @@ BraceWrapping:
AfterUnion: false
BeforeCatch: false
BeforeElse: false
SplitEmptyFunction: true
SplitEmptyFunction: false
SplitEmptyNamespace: true
SplitEmptyRecord: true
# IndentBraces: true indents if statements to be non aligned within the body

BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: false
BreakConstructorInitializers: AfterColon
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: AfterColon
BreakStringLiterals: true

### Nests Namespaces namespace Foo { namespace Bar {
CompactNamespaces: true

CompactNamespaces: true ### Nests Namespaces namespace Foo { namespace Bar {
Cpp11BracedListStyle: true
# LineEnding: LF # line ending style : \n or \r\n
DerivePointerAlignment: false ## analyze for most common alignment of & and * and override PointerAlignment if diff
FixNamespaceComments: true # adds namespace comments such as // naemspace a
IncludeBlocks: Merge # sort and merge imports
Expand All @@ -89,6 +82,14 @@ NamespaceIndentation: All
PenaltyBreakAssignment: 16
PenaltyBreakBeforeFirstCallParameter: 0
PenaltyBreakString: 64 # sets extra allowed cols before strings are broken onto newlines
# PenaltyExcessCharacter: 0 # sets penalty for chars once exceeding Col Limit
PenaltyReturnTypeOnItsOwnLine: 0
ReflowComments: true
# Disabled Settings
# BreakAfterReturnType: Automatic removes operator alignment , dont set
# SpacesInLineCommentPrefix: -1 disables spacing before comments but also classes etc.
# PenaltyExcessCharacter: 0 # sets penalty for chars once exceeding Col Limit
# LineEnding: LF # line ending style : \n or \r\n
# BraceWrapping
# - IndentBraces: true indents if statements to be non aligned within the body
# - SplitEmptyFunction: true
#AfterClass: false
33 changes: 16 additions & 17 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,21 @@ project(opencvOCR)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# add_compile_options(-Wno-deprecated-declarations)

# Creates header file with path to images folder
# set(IMAGE_FOLDER_PATH "images")
# configure_file(
# "${CMAKE_CURRENT_SOURCE_DIR}/image_folder_path.h.in"
# "${CMAKE_CURRENT_BINARY_DIR}/image_folder_path.h"
# @ONLY
# )

# ensure LLVM_DIR is set
macro(add_llvm_support)

set(IMAGE_FOLDER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/images")
set(INPUT_OPEN_TEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/images/screenshot.png")

macro(add_llvm_support) # ensure LLVM_DIR is set
add_compile_options(-Wno-deprecated-declarations)
set(llvm_components core support irreader)
llvm_map_components_to_libnames(llvm_libs ${llvm_components})
message(STATUS "Linking LLVM libs to targets: ${ARGN}")
foreach(target ${ARGN})
target_link_libraries(${target} PUBLIC ${llvm_libs})
endforeach()
endmacro()

# include_directories(${CMAKE_SOURCE_DIR}/src)

# recommended for include is to have headers in an include folder
include_directories(${CMAKE_SOURCE_DIR}/src) # can access any headers
include_directories(${CMAKE_SOURCE_DIR})
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

Expand Down Expand Up @@ -114,13 +107,19 @@ endif()

enable_testing()

set(TEST_EXECUTABLES fs_test
set(TEST_EXECUTABLES fs_test ocr_test
tesseractparallel_test bulk_test pdf_test
textractapi_tests ocr_test atomic_test similarity_test
textractapi_tests atomic_test similarity_test
threadlocal_test)

foreach(TEST_EXECUTABLE IN LISTS TEST_EXECUTABLES)
add_executable(${TEST_EXECUTABLE} tests/${TEST_EXECUTABLE}.cc src/fs.cc)

target_compile_definitions(${TEST_EXECUTABLE}
PRIVATE IMAGE_FOLDER_PATH="${IMAGE_FOLDER_PATH}"
PRIVATE INPUT_OPEN_TEST_PATH="${INPUT_OPEN_TEST_PATH}"
)

set_target_properties(${TEST_EXECUTABLE} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/tests"
)
Expand Down
27 changes: 27 additions & 0 deletions docs/cmake_vars/cmake_consts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Declaring Constants during Compilation

```bash

set(IMAGE_FOLDER_PATH "${CMAKE_CURRENT_SOURCE_DIR}/images")
set(INPUT_OPEN_TEST_PATH "${CMAKE_CURRENT_SOURCE_DIR}/images/screenshot.png")


target_compile_definitions(${TEST_EXECUTABLE}
PRIVATE IMAGE_FOLDER_PATH="${IMAGE_FOLDER_PATH}"
PRIVATE INPUT_OPEN_TEST_PATH="${INPUT_OPEN_TEST_PATH}"
)

const auto *const imgFolder = IMAGE_FOLDER_PATH;

const std::string imgFolder = IMAGE_FOLDER_PATH;

```

```cpp
namespace {
const auto *const inputOpenTest = INPUT_OPEN_TEST_PATH;

const std::string cmake_var = std::string(VAR) + "/somefile.txt";

} // namespace
```
110 changes: 104 additions & 6 deletions src/fs.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@

#include "fs.h"
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/Error.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/Path.h>
Expand All @@ -24,6 +26,30 @@ auto getFilePaths(const llvm::Twine &directoryPath) -> llvm::Expected<std::vecto

return filePaths;
}

auto getFilePathsReal(const llvm::Twine &directoryPath)
-> llvm::Expected<std::vector<std::string>> {
std::error_code ERR;
llvm::sys::fs::directory_iterator dirIt(directoryPath, ERR);

if (ERR) {
return llvm::make_error<llvm::StringError>(ERR.message(), ERR);
}

llvm::sys::fs::directory_iterator dirEnd;
std::vector<std::string> filePaths;

for (; dirIt != dirEnd && !ERR; dirIt.increment(ERR)) {
if (ERR) {
return llvm::make_error<llvm::StringError>(ERR.message(), ERR);
}

filePaths.push_back(dirIt->path());
}

return filePaths;
}

auto getFileInfo(const llvm::Twine &Path) -> llvm::Expected<llvm::sys::fs::file_status> {
llvm::sys::fs::file_status Status;
if (std::error_code ERR = llvm::sys::fs::status(Path, Status)) {
Expand All @@ -32,13 +58,21 @@ auto getFileInfo(const llvm::Twine &Path) -> llvm::Expected<llvm::sys::fs::file_
return Status;
}

auto createDirectories(const llvm::Twine &path) -> llvm::Expected<bool> {
auto createDirectories(const llvm::StringRef &path) -> llvm::Expected<bool> {
if (path.empty()) {
return llvm::make_error<llvm::StringError>(
"Empty Path passed to create directory", llvm::inconvertibleErrorCode());
}

if (auto err = llvm::sys::fs::create_directories(path); err) {
return llvm::make_error<llvm::StringError>(err.message(), err);
}
return true;
}

/// @brief Create the Base Directory from a File Path if it does not Exist - returns True if
/// Created Successfully or the Dir Already Exists.
/// @param filePath
/// @return llvm::Expected<bool>
auto createDirectoryForFile(const llvm::Twine &filePath) -> llvm::Expected<bool> {
llvm::SmallString<256> fullPathStorage;
filePath.toVector(fullPathStorage);
Expand All @@ -47,17 +81,81 @@ auto createDirectoryForFile(const llvm::Twine &filePath) -> llvm::Expected<bool>

llvm::StringRef directoryPathRef = llvm::sys::path::parent_path(fullPathRef);

// Handle case when input passed is directly a Dir Path - Create Dir from filePath
if (directoryPathRef.empty()) {
llvm::errs() << "No directory part in path; assuming current directory. Consider using "
"createDirectories() instead if only a Dir Path is expected.\n";
return createDirectories(filePath.getSingleStringRef());
}

llvm::SmallString<256> directoryPath(directoryPathRef);

llvm::errs() << llvm::formatv("BaseDir Computed for: {0} -> {1}\n", filePath, directoryPathRef);

if (llvm::sys::fs::exists(directoryPath)) {
return true;
}

if (auto err = llvm::sys::fs::create_directories(directoryPath); err) {
llvm::errs() << "Error creating directory '" << directoryPath << "': " << err.message() << "\n";
llvm::errs() << "Error creating directory '" << directoryPath << "': " << err.message()
<< "\n";
return llvm::make_error<llvm::StringError>(
llvm::formatv("Error creating directory {0}: {1}", directoryPath.str(), err.message()), err);
llvm::formatv("Error creating directory {0}: {1}", directoryPath.str(), err.message()),
err);
}

return true;
}
}

auto createQualifiedFilePath(const llvm::StringRef &fileName,
const llvm::StringRef &directory,
const llvm::StringRef &extension = ".txt",
const char osSeparator) -> llvm::Expected<std::string> {
// // Ensure directory is valid and append it
// if (!directory.empty() && !llvm::sys::fs::exists(directory)) {
// if (auto ERR = llvm::sys::fs::create_directories(directory); ERR) {
// return llvm::make_error<llvm::StringError>(
// "Failed to create directory: " + directory.str(), ERR);
// }
// }

// llvm::outs() << llvm::formatv(
// "Filename:{0}, Dir:{1}, OsSep:{2}\n", fileName, directory, osSeparator);

llvm::SmallString<256> outputFilePath;
llvm::sys::path::append(outputFilePath, directory);

if (!outputFilePath.empty() && outputFilePath.back() != osSeparator) {
outputFilePath.push_back(osSeparator);
}

// Handle the filename and extension
llvm::SmallString<256> fullFilename = llvm::sys::path::filename(fileName);
llvm::sys::path::replace_extension(fullFilename, extension);
llvm::sys::path::append(outputFilePath, fullFilename);

llvm::errs() << "Final constructed path: " << outputFilePath << "\n";
return std::string(outputFilePath.str());
}

auto deleteFile(const llvm::Twine &filePath) -> llvm::Error {
if (auto err = llvm::sys::fs::remove(filePath)) {
return llvm::make_error<llvm::StringError>("Failed to delete file: " + filePath.str(), err);
}
return llvm::Error::success();
}

auto deleteDirectory(const llvm::Twine &directoryPath) -> llvm::Error {
if (auto err = llvm::sys::fs::remove_directories(directoryPath)) {
return llvm::make_error<llvm::StringError>(
"Failed to delete directory: " + directoryPath.str(), err);
}
return llvm::Error::success();
}

/**
* @brief Get the Err object as a str for llvm::Expected<T>
* @param err
* @return std::string
*/
auto getErr(llvm::Error err) -> std::string { return llvm::toString(std::move(err)); }
41 changes: 40 additions & 1 deletion src/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,53 @@
#ifndef FS_H
#define FS_H

#include <llvm/ADT/SmallString.h>
#include <llvm/Support/Error.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/Path.h>

auto getFilePaths(const llvm::Twine &directoryPath) -> llvm::Expected<std::vector<std::string>>;

auto getFileInfo(const llvm::Twine &Path) -> llvm::Expected<llvm::sys::fs::file_status>;

auto createDirectories(const llvm::Twine &path) -> llvm::Expected<bool>;
auto createDirectories(const llvm::StringRef &path) -> llvm::Expected<bool>;

auto createDirectoryForFile(const llvm::Twine &filePath) -> llvm::Expected<bool>;

auto createQualifiedFilePath(const llvm::StringRef &fileName,
const llvm::StringRef &directory,
const llvm::StringRef &extension,
char osSeparator = '/') -> llvm::Expected<std::string>;

auto deleteFile(const llvm::Twine &filePath) -> llvm::Error;

auto deleteDirectory(const llvm::Twine &directoryPath) -> llvm::Error;

/**
* @brief Deletes multiple directories. This function takes a variable number of paths and attempts
* to delete each one. It aggregates any errors encountered into a single error object that is then
* returned.
*
* @tparam Paths Variadic template to accept multiple path arguments of potentially differing types.
* @param paths One or more directory paths to delete.
* @return llvm::Error Aggregated error object containing any errors that occurred during the
* deletion process. A successful deletion results in a success error state (empty error).
*
* @code{.cpp}
*
* if (auto err = deleteDirectories(tempDirectory, tempBasePath, tempBaseResolvedPath)) {
* llvm::errs() << "Error deleting directories: " << llvm::toString(std::move(err)) << '\n';
* }
*
* @endcode
*/
template <typename... Paths>
auto deleteDirectories(const Paths &...paths) -> llvm::Error {
llvm::Error aggregatedError = llvm::Error::success();
(..., (aggregatedError = llvm::joinErrors(std::move(aggregatedError), deleteDirectory(paths))));
return aggregatedError;
}

auto getErr(llvm::Error err) -> std::string;

#endif // FS_H
35 changes: 35 additions & 0 deletions src/tesseract.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

#include <leptonica/allheaders.h>
#include <llvm/Support/raw_ostream.h>
#include <tesseract/baseapi.h>
#include <tesseract/renderer.h>

void createPDF(const std::string &input_path,
const std::string &output_path,
const char *tessdata_path,
bool text_only = false) {
// const char *datapath = "/opt/homebrew/opt/tesseract/share/tessdata";

auto *api = new tesseract::TessBaseAPI();
if (api->Init(tessdata_path, "eng") != 0) {
llvm::errs() << "Error: Could not initialize Tesseract OCR API." << '\n';
llvm::errs() << "Tessdata path: " << tessdata_path << '\n';
delete api; // Don't forget to delete api in case of failure
return;
}

auto *renderer = new tesseract::TessPDFRenderer(output_path.c_str(), tessdata_path, text_only);

bool succeed = api->ProcessPages(input_path.c_str(), nullptr, 0, renderer);
if (!succeed) {
llvm::errs() << "Error: Failed to process pages." << '\n';
llvm::errs() << "Input file: " << input_path << '\n';
llvm::errs() << "Output file: " << output_path << '\n';
} else {
llvm::outs() << "PDF creation succeeded." << '\n';
}

api->End();
delete api;
delete renderer;
}
Loading

0 comments on commit 6ea283c

Please sign in to comment.