Skip to content

Commit

Permalink
feat: redesign api #38
Browse files Browse the repository at this point in the history
  • Loading branch information
rueedlinger committed Jun 11, 2024
1 parent 58b48f2 commit 670881f
Show file tree
Hide file tree
Showing 42 changed files with 1,529 additions and 1,713 deletions.
59 changes: 59 additions & 0 deletions teal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import logging
import logging.config
import os

import yaml
from fastapi import FastAPI, Request

from teal.core import default_logging_conf
from teal.core.http import (
create_json_err_response_from_exception,
CheckUnknownQueryParamsRouter,
)
from teal.core.internal import AppPrometheusAdapter, OpenApiAdapter

logger = logging.getLogger("teal.api")


def init_logging():

if "TEAL_LOG_CONF" in os.environ:
log_conf_file = os.environ["TEAL_LOG_CONF"]
with open(log_conf_file, "rt") as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
logger.info(f"logging config loaded from {log_conf_file}")
else:
logging.config.dictConfig(default_logging_conf())
logger.info(f"logging config file not set using default")


def create_app() -> FastAPI:

init_logging()

app = FastAPI()

@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, ex: Exception):
return create_json_err_response_from_exception(ex)

@app.on_event("startup")
async def startup_event():
logger.info("startup")

from teal.routers import extract, create, ocr, validate, internal

app.include_router(extract.router)
app.include_router(create.router)
app.include_router(ocr.router)
app.include_router(validate.router)
app.include_router(internal.router)

openapi_adapter = OpenApiAdapter(app)
openapi_adapter.enable_openapi()

prometheus_adapter = AppPrometheusAdapter(app)
prometheus_adapter.enable_prometheus()

return app
320 changes: 2 additions & 318 deletions teal/api.py
Original file line number Diff line number Diff line change
@@ -1,319 +1,3 @@
import logging.config
import os
from typing import Any, List
from teal import create_app

import yaml
from fastapi import FastAPI, UploadFile, Request, Query
from fastapi.openapi.utils import get_openapi
from prometheus_fastapi_instrumentator import Instrumentator, metrics
from starlette.responses import FileResponse

from teal.core import (
is_feature_enabled,
get_version,
get_tesseract_languages,
get_app_info,
)
from teal.http import (
create_json_err_response_from_exception,
CheckUnknownQueryParamsRouter,
)
from teal.libreoffice import LibreOfficeAdapter
from teal.model import (
TextExtract,
TableExtract,
PdfAReport,
OcrPdfAProfile,
LibreOfficePdfProfile,
HealthCheck,
ValidatePdfProfile,
PdfMetaDataReport,
OcrMode,
AppInfo,
)
from teal.pdf import PdfDataExtractor, PdfMetaDataExtractor
from teal.pdfa import PdfAValidator, PdfAConverter

app = FastAPI()
app.router.route_class = CheckUnknownQueryParamsRouter

logger = logging.getLogger("teal.api")
if "TEAL_LOG_CONF" in os.environ:
log_conf_file = os.environ["TEAL_LOG_CONF"]
with open(log_conf_file, "rt") as f:
config = yaml.safe_load(f.read())
logging.config.dictConfig(config)
logger.info(f"logging config loaded from {log_conf_file}")
else:
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"simple": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S",
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "simple",
"stream": "ext://sys.stdout",
},
},
"loggers": {
"teal": {"level": "INFO", "handlers": ["console"], "propagate": False},
"uvicorn": {
"level": "INFO",
"handlers": ["console"],
"propagate": False,
},
},
"root": {"handlers": ["console"], "level": "WARN"},
}
)
logger.info(f"logging config file not set using default")


# get root logger
logger.info(f"installed tesseract languages: {get_tesseract_languages()}")


@app.exception_handler(Exception)
async def unicorn_exception_handler(request: Request, ex: Exception):
return create_json_err_response_from_exception(ex)


if is_feature_enabled("TEAL_FEATURE_PDF_TEXT"):
logger.info("feature PDF text is enabled")

@app.post(
"/pdf/text",
summary="Extract text from a PDF",
response_model=List[TextExtract],
tags=["pdf"],
)
async def extract_text_from_pdf(
file: UploadFile,
pages: str = Query(default=None),
) -> Any:
logger.debug(f"extract text from pdf file='{file.filename}', pages='{pages}'")
pdf = PdfDataExtractor()
return pdf.extract_text(
data=await file.read(), filename=file.filename, page_ranges=pages
)


if is_feature_enabled("TEAL_FEATURE_PDF_OCR"):
logger.info("feature PDF ocr is enabled")

@app.post(
"/pdf/ocr",
summary="Extract text with OCR from a PDF",
response_model=List[TextExtract],
tags=["pdf"],
)
async def extract_text_with_ocr_from_pdf(
file: UploadFile,
languages: List[str] = Query([]),
pages: str = Query(default=None),
) -> Any:
logger.debug(
f"extract text with ocr from pdf file='{file.filename}', languages='{languages}', pages='{pages}'"
)
pdf = PdfDataExtractor()
return pdf.extract_text_with_ocr(
data=await file.read(),
filename=file.filename,
langs=languages,
page_ranges=pages,
)


if is_feature_enabled("TEAL_FEATURE_PDF_TABLE"):
logger.info("feature PDF table is enabled")

@app.post(
"/pdf/table",
summary="Extract tables from a PDF",
response_model=List[TableExtract],
tags=["pdf"],
)
async def extract_table_from_pdf(
file: UploadFile,
pages: str = Query(default=None),
) -> Any:
logger.debug(f"extract table from pdf file='{file.filename}', pages='{pages}'")
pdf = PdfDataExtractor()
return pdf.extract_table(
data=await file.read(), filename=file.filename, page_ranges=pages
)


if is_feature_enabled("TEAL_FEATURE_PDF_META"):
logger.info("feature PDF meta data is enabled")

@app.post(
"/pdf/meta",
summary="Extract metadata from a PDF",
response_model=PdfMetaDataReport,
tags=["pdf"],
)
async def extract_text_from_pdf(
file: UploadFile,
) -> Any:
logger.debug(f"extract meta data from pdf file='{file.filename}'")
pdf = PdfMetaDataExtractor()
return pdf.extract_meta_data(data=await file.read(), filename=file.filename)


if is_feature_enabled("TEAL_FEATURE_PDFA_CONVERT"):
logger.info("feature PDF/A convert is enabled")

@app.post(
"/pdfa/convert",
summary="Convert PDF documents to PDF/A",
response_class=FileResponse,
tags=["pdfa"],
)
async def convert_pdf_to_pdfa_with_ocr(
file: UploadFile,
languages: List[str] = Query(default=None),
profile: OcrPdfAProfile = Query(default=None),
ocr: OcrMode = Query(default=None),
pages: str = Query(default=None),
) -> Any:
logger.debug(
f"extract table from pdf file='{file.filename}', languages='{languages}, profile='{profile}', ocr={ocr}, pages='{pages}'"
)
pdf = PdfAConverter()
return pdf.convert_pdfa(
data=await file.read(),
filename=file.filename,
langs=languages,
pdfa_profile=profile,
ocr_mode=ocr,
page_ranges=pages,
)


if is_feature_enabled("TEAL_FEATURE_PDFA_VALIDATE"):
logger.info("feature PDF/A validate is enabled")

@app.post(
"/pdfa/validate",
summary="Validate PDF documents for PDF/A compliance",
response_model=PdfAReport,
tags=["pdfa"],
)
async def validate_pdfa(
file: UploadFile,
profile: ValidatePdfProfile = Query(default=None),
) -> Any:
logger.debug(
f"extract table from pdf file='{file.filename}', profile='{profile}'"
)
pdf = PdfAValidator()
return pdf.validate_pdf(
data=await file.read(), filename=file.filename, profile=profile
)


if is_feature_enabled("TEAL_FEATURE_LIBREOFFICE_CONVERT"):
logger.info("feature libreoffice convert is enabled")

@app.post(
"/libreoffice/convert",
summary="Convert LibreOffice documents to PDF or PDF/A",
response_class=FileResponse,
tags=["libreoffice"],
)
async def convert_libreoffice_docs_to_pdf(
file: UploadFile,
profile: LibreOfficePdfProfile = Query(default=None),
pages: str = Query(default=None),
) -> Any:
logger.debug(
f"libreoffice convert to pdf file='{file.filename}', profile={profile}, pages='{pages}'"
)
libreoffice = LibreOfficeAdapter()
return libreoffice.convert_to_pdf(
data=await file.read(),
filename=file.filename,
pdf_profile=profile,
page_ranges=pages,
)


if is_feature_enabled("TEAL_FEATURE_APP_HEALTH"):

@app.get(
"/app/health",
tags=["app"],
summary="Health Check",
response_model=HealthCheck,
)
def get_health() -> HealthCheck:
return HealthCheck(status="OK")


if is_feature_enabled("TEAL_FEATURE_APP_INFO"):

@app.get(
"/app/info",
tags=["app"],
summary="Application information's",
response_model=AppInfo,
)
def get_health() -> AppInfo:
return get_app_info()


def custom_openapi():
tags_metadata = [
{
"name": "pdf",
"description": "Extract text, perform OCR, or extract tables from PDFs.",
},
{
"name": "pdfa",
"description": "Convert PDF to PDF/A and validate PDF/A compliance.",
},
{
"name": "libreoffice",
"description": "Convert LibreOffice documents to PDF.",
},
{
"name": "app",
"description": "Application information.",
},
]

if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
title="teal",
version=get_version(),
summary="A convenient REST API for working with PDF's",
description="**teal** aims to provide a user-friendly API for working with PDFs which can be easily integrated in an existing workflow. ",
routes=app.routes,
tags=tags_metadata,
)
openapi_schema["info"]["x-logo"] = {
"url": "https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png"
}
app.openapi_schema = openapi_schema
return app.openapi_schema


app.openapi = custom_openapi

if is_feature_enabled("TEAL_FEATURE_APP_METRICS"):
instrumentator = Instrumentator(
excluded_handlers=["/app/*", "/docs/*", "/openapi.json"]
)
instrumentator.instrument(app).expose(
app, endpoint="/app/metrics", include_in_schema=True, tags=["app"]
)
instrumentator.add(metrics.requests())
app = create_app()
Loading

0 comments on commit 670881f

Please sign in to comment.