From 7cb64669d55ca36fe760e86f34189ee706359da3 Mon Sep 17 00:00:00 2001
From: Konstantin Baierer
Das Team gab eine Einführung in die Ziele und öffentlichen Kommunikationskanäle von OCR-D in Phase III, in Status und Pläne der OCR-Software und der Web-API und in den Umgang mit Ground Truth Daten in OCR-D. Zudem gab das Koordinierungsprojekt einen Einblick in die bisherige Praxis der Softwareentwicklung in OCR-D mit Möglichkeiten, mitzuwirken.
diff --git a/docs/js/ocrd-all-tool.json b/docs/js/ocrd-all-tool.json index a755f3715..1d14644b8 100644 --- a/docs/js/ocrd-all-tool.json +++ b/docs/js/ocrd-all-tool.json @@ -1,441 +1,696 @@ { - "ocrd-anybaseocr-binarize": { + "ocrd-cor-asv-ann-process": { + "executable": "ocrd-cor-asv-ann-process", "categories": [ - "Image preprocessing" + "Text recognition and optimization" ], - "description": "Binarizes images with the algorithm from ocropy and outputs it as an AlternativeImage.", - "executable": "ocrd-anybaseocr-binarize", + "steps": [ + "recognition/post-correction" + ], + "description": "Improve text annotation by character-level encoder-attention-decoder ANN model", "input_file_grp": [ - "OCR-D-IMG" + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY" ], "output_file_grp": [ - "OCR-D-IMG-BIN" + "OCR-D-COR-ASV" ], "parameters": { - "bignore": { - "default": 0.1, - "description": "ignore this much of the border for threshold estimation", - "format": "float", - "type": "number" - }, - "debug": { - "default": 0, - "description": "display intermediate results", - "format": "integer", - "type": "number" - }, - "escale": { - "default": 1.0, - "description": "scale for estimating a mask over the text region", - "format": "float", - "type": "number" + "model_file": { + "type": "string", + "format": "uri", + "content-type": "application/x-hdf;subtype=bag", + "description": "path of h5py weight/config file for model trained with cor-asv-ann-train", + "required": true, + "cacheable": true }, - "gray": { - "default": false, - "description": "force grayscale processing even if image seems binary", - "type": "boolean" + "textequiv_level": { + "type": "string", + "enum": [ + "line", + "word", + "glyph" + ], + "default": "glyph", + "description": "PAGE XML hierarchy level to read/write TextEquiv input/output on" }, - "hi": { - "default": 90, - "description": "percentile for white estimation", - "format": "float", - "type": "number" + "charmap": { + "type": "object", + "default": {}, + "description": "mapping for input characters before passing to correction; can be used to adapt to character set mismatch between input and model (without relying on underspecification alone)" }, - "lo": { - "default": 5, - "description": "percentile for black estimation", + "rejection_threshold": { + "type": "number", "format": "float", - "type": "number" - }, - "nocheck": { - "default": false, - "description": "disable error checking on inputs", - "type": "boolean" - }, - "operation_level": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region", - "line" - ], - "type": "string" + "default": 0.5, + "description": "minimum probability of the candidate corresponding to the input character in each hypothesis during beam search, helps balance precision/recall trade-off; set to 0 to disable rejection (max recall) or 1 to disable correction (max precision)" }, - "perc": { - "default": 80, - "description": "percentage for filters", + "relative_beam_width": { + "type": "number", "format": "float", - "type": "number" + "default": 0.2, + "description": "minimum fraction of the best candidate's probability required to enter the beam in each hypothesis; controls the quality/performance trade-off" }, - "range": { - "default": 20, - "description": "range for filters", + "fixed_beam_width": { + "type": "number", "format": "integer", - "type": "number" - }, - "raw_copy": { - "default": false, - "description": "also copy the raw image", - "type": "boolean" + "default": 15, + "description": "maximum number of candidates allowed to enter the beam in each hypothesis; controls the quality/performance trade-off" }, - "show": { + "fast_mode": { + "type": "boolean", "default": false, - "description": "display final results", - "type": "boolean" - }, - "threshold": { - "default": 0.5, - "description": "threshold, determines lightness", - "format": "float", - "type": "number" - }, - "zoom": { - "default": 0.5, - "description": "zoom for page background estimation, smaller=faster", - "format": "float", - "type": "number" + "description": "decode greedy instead of beamed, with batches of parallel lines instead of parallel alternatives; also disables rejection and beam parameters; enable if performance is far more important than quality" } - }, - "steps": [ - "preprocessing/optimization/binarization" - ] + } }, - "ocrd-anybaseocr-block-segmentation": { + "ocrd-cor-asv-ann-evaluate": { + "executable": "ocrd-cor-asv-ann-evaluate", "categories": [ - "Layout analysis" + "Text recognition and optimization" ], - "description": "Segments and classifies regions in each single page and annotates the the region polygons and classes.", - "executable": "ocrd-anybaseocr-block-segmentation", + "steps": [ + "recognition/evaluation" + ], + "description": "Align different textline annotations and compute distance", "input_file_grp": [ - "OCR-D-IMG" + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-EVAL-CER" ], "parameters": { - "active_classes": { - "default": [ - "page-number", - "paragraph", - "catch-word", - "heading", - "drop-capital", - "signature-mark", - "marginalia", - "caption" + "metric": { + "type": "string", + "enum": [ + "Levenshtein-fast", + "Levenshtein", + "NFC", + "NFKC", + "historic_latin" ], - "description": "Restrict types of regions to be detected.", - "items": { - "enum": [ - "page-number", - "paragraph", - "catch-word", - "heading", - "drop-capital", - "signature-mark", - "header", - "marginalia", - "footnote", - "footnote-continued", - "caption", - "endnote", - "footer", - "keynote", - "image", - "table", - "graphics" - ], - "type": "string" - }, - "type": "array" - }, - "block_segmentation_weights": { - "cacheable": true, - "content-type": "application/x-hdf;subtype=bag", - "default": "block_segmentation_weights.h5", - "description": "Path to model weights", - "format": "uri", - "type": "string" - }, - "min_confidence": { - "default": 0.9, - "description": "Confidence threshold for region detections", - "format": "float", - "type": "number" - }, - "min_iou_drop": { - "default": 0.8, - "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to suppress prediction scoring worse", - "format": "float", - "type": "number" - }, - "min_iou_merge": { - "default": 0.2, - "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to merge prediction scoring worse", - "format": "float", - "type": "number" + "default": "Levenshtein-fast", + "description": "Distance metric to calculate and aggregate: `historic_latin` for GT level 1-3, `NFKC` for roughly GT level 2 (but including reduction of `\u017f/s` and superscript numerals etc), `Levenshtein` for GT level 3 (or `Levenshtein-fast` for faster alignment - but using maximum sequence length instead of path length as CER denominator, and without confusion statistics)." }, - "min_share_drop": { - "default": 0.9, - "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to suppress smaller prediction", - "format": "float", - "type": "number" + "gt_level": { + "type": "number", + "enum": [ + 1, + 2, + 3 + ], + "default": 1, + "description": "When `metric=historic_latin`, normalize and equate at this GT transcription level." }, - "min_share_merge": { - "default": 0.8, - "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to merge smaller prediction", - "format": "float", - "type": "number" + "confusion": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 0, + "description": "Count edits and show that number of most frequent confusions (non-identity) in the end." }, - "overwrite": { + "histogram": { + "type": "boolean", "default": false, - "description": "whether to delete existing text lines prior to segmentation", - "type": "boolean" - }, - "post_process": { - "default": true, - "description": "whether to apply non-maximum suppression (across classes) on the detections", - "type": "boolean" - }, - "th": { - "default": 15, - "description": "num of pixels to include in the area region (when applying text/non-text mask from tiseg)", - "type": "integer" - }, - "use_masks": { - "default": true, - "description": "whether to segment from the mask as polygon instead of just the bbox", - "type": "boolean" + "description": "Aggregate and show mutual character histograms." } - }, + } + }, + "ocrd-cor-asv-ann-align": { + "executable": "ocrd-cor-asv-ann-align", + "categories": [ + "Text recognition and optimization" + ], "steps": [ - "layout/segmentation/region" - ] + "recognition/post-correction" + ], + "description": "Align different textline annotations and pick best", + "input_file_grp": [ + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" + ], + "output_file_grp": [ + "OCR-D-OCR-MULTI" + ], + "parameters": { + "method": { + "type": "string", + "enum": [ + "majority", + "confidence", + "combined" + ], + "default": "majority", + "description": "decide by majority of OCR hypotheses, by highest confidence of OCRs or by a combination thereof" + } + } }, - "ocrd-anybaseocr-crop": { + "ocrd-cor-asv-ann-join": { + "executable": "ocrd-cor-asv-ann-join", "categories": [ - "Image preprocessing" + "Text recognition and optimization" ], - "description": "Detect the input images' page frame, annotate it as border polygon and add a cropped derived image.", - "executable": "ocrd-anybaseocr-crop", + "steps": [ + "recognition/post-correction" + ], + "description": "Join different textline annotations by concatenation", "input_file_grp": [ - "OCR-D-IMG-DESKEW" + "OCR-D-GT-SEG-LINE", + "OCR-D-OCR-TESS", + "OCR-D-OCR-KRAK", + "OCR-D-OCR-OCRO", + "OCR-D-OCR-CALA", + "OCR-D-OCR-ANY", + "OCR-D-COR-ASV" ], "output_file_grp": [ - "OCR-D-IMG-CROP" + "OCR-D-OCR-MULTI" ], "parameters": { - "columnAreaMin": { - "default": 0.05, - "description": "text block detection: minimum area of individual columns (as ratio of total image pixels)", - "format": "float", - "type": "number" - }, - "columnSepWidthMax": { - "default": 0.04, - "description": "text block detection: maximum width between individual columns (as ratio of total image width)", - "format": "float", - "type": "number" + "add-filegrp-comments": { + "type": "boolean", + "default": false, + "description": "set @comments of each TextEquiv to the fileGrp it came from" + } + } + }, + "ocrd-cor-asv-ann-mark": { + "executable": "ocrd-cor-asv-ann-mark", + "description": "mark words not found by a spellchecker", + "steps": [ + "recognition/post-correction" + ], + "categories": [ + "Text recognition and optimization" + ], + "parameters": { + "command": { + "type": "string", + "required": true, + "description": "external tool to query word forms, e.g. 'hunspell -i utf-8 -d de_DE,en_US -w'" }, - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (used to zoom/scale during processing; overrides any meta-data in the images); disabled when zero or negative", - "format": "float", - "type": "number" + "normalization": { + "type": "object", + "default": {}, + "description": "mapping of characters prior to spellcheck, e.g. {'\u017f': 's', 'a\u0364': '\u00e4'}" }, - "marginBottom": { - "default": 0.75, - "description": "ruler / edge / text detection: minimum y position to crop from below (as ratio of total image height)", - "format": "float", - "type": "number" + "format": { + "type": "string", + "default": "conf", + "description": "how unknown words should be marked; if 'conf', then writes @conf=0.123, otherwise writes that value into @comments" + } + } + }, + "ocrd-dummy": { + "executable": "ocrd-dummy", + "description": "Bare-bones processor creates PAGE-XML and optionally copies file from input group to output group", + "steps": [ + "preprocessing/optimization" + ], + "categories": [ + "Image preprocessing" + ], + "input_file_grp": "DUMMY_INPUT", + "output_file_grp": "DUMMY_OUTPUT", + "parameters": { + "copy_files": { + "type": "boolean", + "default": false, + "description": "Whether to actually copy files (true) or just create PAGE-XML as a side effect (false)" + } + } + }, + "ocrd-dinglehopper": { + "executable": "ocrd-dinglehopper", + "description": "Evaluate OCR text against ground truth with dinglehopper", + "input_file_grp": [ + "OCR-D-GT-PAGE", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-OCR-EVAL" + ], + "categories": [ + "Quality assurance" + ], + "steps": [ + "recognition/text-recognition" + ], + "parameters": { + "metrics": { + "type": "boolean", + "default": true, + "description": "Enable/disable metrics and green/red" }, - "marginLeft": { - "default": 0.3, - "description": "ruler / edge / text detection: maximum x position to crop from left (as ratio of total image width)", - "format": "float", - "type": "number" + "textequiv_level": { + "type": "string", + "enum": [ + "region", + "line" + ], + "default": "region", + "description": "PAGE XML hierarchy level to extract the text from" + } + } + }, + "ocrd-docstruct": { + "executable": "ocrd-docstruct", + "categories": [ + "Layout analysis" + ], + "description": "Parsing page-level text regions with headings and reading order, create a dummy logical structMap", + "steps": [ + "layout/analysis" + ], + "parameters": { + "mode": { + "type": "string", + "enum": [ + "enmap", + "dfg" + ], + "default": "dfg", + "description": "representational convention to use in the METS; either ENMAP profile (using mets:area) or DFG profile (using only mets:structLink)" }, - "marginRight": { - "default": 0.7, - "description": "ruler / edge / text detection: minimum x position to crop from right (as ratio of total image width)", - "format": "float", - "type": "number" + "type": { + "type": "string", + "enum": [ + "chapter", + "section", + "article" + ], + "default": "article", + "description": "mets:div type to use for headings" + } + } + }, + "ocrd-eynollah-segment": { + "executable": "ocrd-eynollah-segment", + "categories": [ + "Layout analysis" + ], + "description": "Segment page into regions and lines and do reading order detection with eynollah", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "parameters": { + "models": { + "type": "string", + "format": "file", + "content-type": "text/directory", + "cacheable": true, + "description": "Path to directory containing models to be used (See https://qurator-data.de/eynollah)", + "required": true }, - "marginTop": { - "default": 0.25, - "description": "ruler / edge / text detection: maximum y position to crop from above (as ratio of total image height)", + "dpi": { + "type": "number", "format": "float", - "type": "number" - }, - "padding": { - "default": 10, - "description": "extend / shrink border resulting from edge detection / text detection by this many px in each direction", - "format": "integer", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images); ignored if <= 0 (with fall-back 230)", + "default": 0 }, - "rulerAreaMax": { - "default": 0.3, - "description": "ruler detection and suppression: maximum area of bbox (as ratio of total image pixels)", - "format": "float", - "type": "number" + "full_layout": { + "type": "boolean", + "default": true, + "description": "Try to detect all element subtypes, including drop-caps and headings" }, - "rulerAreaMin": { - "default": 0.01, - "description": "ruler detection and suppression: minimum area of bbox (as ratio of total image pixels)", - "format": "float", - "type": "number" + "tables": { + "type": "boolean", + "default": false, + "description": "Try to detect table regions" }, - "rulerRatioMax": { - "default": 50.0, - "description": "ruler detection and suppression: maximum aspect ratio of bbox", - "format": "float", - "type": "number" + "curved_line": { + "type": "boolean", + "default": false, + "description": "try to return contour of textlines instead of just rectangle bounding box. Needs more processing time" }, - "rulerRatioMin": { - "default": 3.0, - "description": "ruler detection and suppression: minimum aspect ratio of bbox", - "format": "float", - "type": "number" + "allow_scaling": { + "type": "boolean", + "default": false, + "description": "check the resolution against the number of detected columns and if needed, scale the image up or down during layout detection (heuristic to improve quality and performance)" }, - "rulerWidthMax": { - "default": 0.95, - "description": "ruler detection and suppression: maximum width of bbox (as ratio of total image width)", - "format": "float", - "type": "number" + "headers_off": { + "type": "boolean", + "default": false, + "description": "ignore the special role of headings during reading order detection" } }, - "steps": [ - "preprocessing/optimization/cropping" + "resources": [ + { + "description": "models for eynollah (TensorFlow format)", + "url": "https://qurator-data.de/eynollah/2021-04-25/SavedModel.tar.gz", + "name": "default", + "size": 1483106598, + "type": "archive", + "path_in_archive": "default" + } ] }, - "ocrd-anybaseocr-deskew": { + "ocrd-nmalign-merge": { + "executable": "ocrd-nmalign-merge", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/post-correction" + ], + "description": "forced alignment of lists of string by fuzzy string matching", + "parameters": { + "normalization": { + "type": "object", + "default": {}, + "additionalProperties": { + "type": "string" + }, + "description": "replacement pairs (regex patterns and regex backrefs) to be applied prior to matching (but not on the result itself)" + }, + "allow_splits": { + "type": "boolean", + "default": false, + "description": "allow line strings of the first input fileGrp to be matched by multiple line strings of the second input fileGrp (so concatenate all the latter before inserting into the former)" + } + } + }, + "ocrd-anybaseocr-binarize": { + "executable": "ocrd-anybaseocr-binarize", + "description": "Binarizes images with the algorithm from ocropy and outputs it as an AlternativeImage.", "categories": [ "Image preprocessing" ], - "description": "Deskews images with the algorithm from ocropy and outputs a deskew angle.", - "executable": "ocrd-anybaseocr-deskew", + "steps": [ + "preprocessing/optimization/binarization" + ], "input_file_grp": [ - "OCR-D-IMG-BIN" + "OCR-D-IMG" ], "output_file_grp": [ - "OCR-D-IMG-DESKEW" + "OCR-D-IMG-BIN" ], "parameters": { + "nocheck": { + "type": "boolean", + "default": false, + "description": "disable error checking on inputs" + }, + "show": { + "type": "boolean", + "default": false, + "description": "display final results" + }, + "raw_copy": { + "type": "boolean", + "default": false, + "description": "also copy the raw image" + }, + "gray": { + "type": "boolean", + "default": false, + "description": "force grayscale processing even if image seems binary" + }, "bignore": { - "default": 0.1, - "description": "ignore this much of the border for threshold estimation", + "type": "number", "format": "float", - "type": "number" + "default": 0.1, + "description": "ignore this much of the border for threshold estimation" }, "debug": { - "default": 0, - "description": "display intermediate results", + "type": "number", "format": "integer", - "type": "number" + "default": 0, + "description": "display intermediate results" }, "escale": { - "default": 1.0, - "description": "scale for estimating a mask over the text region", + "type": "number", "format": "float", - "type": "number" + "default": 1.0, + "description": "scale for estimating a mask over the text region" }, "hi": { + "type": "number", + "format": "float", "default": 90, - "description": "percentile for white estimation", - "format": "integer", - "type": "number" + "description": "percentile for white estimation" }, "lo": { + "type": "number", + "format": "float", "default": 5, - "description": "percentile for black estimation", + "description": "percentile for black estimation" + }, + "perc": { + "type": "number", + "format": "float", + "default": 80, + "description": "percentage for filters" + }, + "range": { + "type": "number", "format": "integer", - "type": "number" + "default": 20, + "description": "range for filters" }, - "maxskew": { - "default": 1.0, - "description": "skew angle estimation parameters (degrees)", + "threshold": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "threshold, determines lightness" + }, + "zoom": { + "type": "number", "format": "float", - "type": "number" + "default": 0.5, + "description": "zoom for page background estimation, smaller=faster" }, "operation_level": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", + "type": "string", "enum": [ "page", "region", "line" ], - "type": "string" - }, - "parallel": { - "default": 0, - "description": "???", - "format": "integer", - "type": "number" - }, - "skewsteps": { - "default": 8, - "description": "steps for skew angle estimation (per degree)", - "format": "integer", - "type": "number" - }, - "threshold": { - "default": 0.5, - "description": "threshold, determines lightness", - "format": "float", - "type": "number" + "default": "page", + "description": "PAGE XML hierarchy level to operate on" } - }, - "steps": [ - "preprocessing/optimization/deskewing" - ] + } }, - "ocrd-anybaseocr-dewarp": { + "ocrd-anybaseocr-deskew": { + "executable": "ocrd-anybaseocr-deskew", + "description": "Deskews images with the algorithm from ocropy and outputs a deskew angle.", "categories": [ "Image preprocessing" ], - "description": "Dewarps the input image with anyBaseOCR and outputs it as an AlternativeImage", - "executable": "ocrd-anybaseocr-dewarp", + "steps": [ + "preprocessing/optimization/deskewing" + ], "input_file_grp": [ - "OCR-D-IMG-CROP" + "OCR-D-IMG-BIN" ], "output_file_grp": [ - "OCR-D-IMG-DEWARP" + "OCR-D-IMG-DESKEW" ], "parameters": { - "gpu_id": { - "default": -1, - "description": "CUDA device ID of GPU to use, or -1 for CPU only", - "format": "integer", - "type": "number" + "escale": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "scale for estimating a mask over the text region" }, - "model_path": { - "cacheable": true, - "content-type": "application/vnd.pytorch", - "default": "latest_net_G.pth", - "description": "Path to the trained pix2pixHD model", - "format": "uri", - "type": "string" + "bignore": { + "type": "number", + "format": "float", + "default": 0.1, + "description": "ignore this much of the border for threshold estimation" + }, + "threshold": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "threshold, determines lightness" + }, + "maxskew": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "skew angle estimation parameters (degrees)" + }, + "skewsteps": { + "type": "number", + "format": "integer", + "default": 8, + "description": "steps for skew angle estimation (per degree)" + }, + "debug": { + "type": "number", + "format": "integer", + "default": 0, + "description": "display intermediate results" + }, + "parallel": { + "type": "number", + "format": "integer", + "default": 0, + "description": "???" + }, + "lo": { + "type": "number", + "format": "integer", + "default": 5, + "description": "percentile for black estimation" + }, + "hi": { + "type": "number", + "format": "integer", + "default": 90, + "description": "percentile for white estimation" }, "operation_level": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on (should match what model was trained on!)", + "type": "string", "enum": [ "page", - "region" + "region", + "line" ], - "type": "string" + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + } + } + }, + "ocrd-anybaseocr-crop": { + "executable": "ocrd-anybaseocr-crop", + "description": "Detect the input images' page frame, annotate it as border polygon and add a cropped derived image.", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/cropping" + ], + "input_file_grp": [ + "OCR-D-IMG-DESKEW" + ], + "output_file_grp": [ + "OCR-D-IMG-CROP" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "default": 0, + "description": "pixel density in dots per inch (used to zoom/scale during processing; overrides any meta-data in the images); disabled when zero or negative" }, - "resize_height": { - "default": 1024, - "description": "target image height before input to the network", - "format": "integer", - "type": "number" + "rulerRatioMax": { + "type": "number", + "format": "float", + "default": 50.0, + "description": "ruler detection and suppression: maximum aspect ratio of bbox" + }, + "rulerRatioMin": { + "type": "number", + "format": "float", + "default": 3.0, + "description": "ruler detection and suppression: minimum aspect ratio of bbox" + }, + "rulerAreaMax": { + "type": "number", + "format": "float", + "default": 0.3, + "description": "ruler detection and suppression: maximum area of bbox (as ratio of total image pixels)" + }, + "rulerAreaMin": { + "type": "number", + "format": "float", + "default": 0.01, + "description": "ruler detection and suppression: minimum area of bbox (as ratio of total image pixels)" + }, + "rulerWidthMax": { + "type": "number", + "format": "float", + "default": 0.95, + "description": "ruler detection and suppression: maximum width of bbox (as ratio of total image width)" + }, + "columnAreaMin": { + "type": "number", + "format": "float", + "default": 0.05, + "description": "text block detection: minimum area of individual columns (as ratio of total image pixels)" + }, + "columnSepWidthMax": { + "type": "number", + "format": "float", + "default": 0.04, + "description": "text block detection: maximum width between individual columns (as ratio of total image width)" + }, + "marginTop": { + "type": "number", + "format": "float", + "default": 0.25, + "description": "ruler / edge / text detection: maximum y position to crop from above (as ratio of total image height)" + }, + "marginBottom": { + "type": "number", + "format": "float", + "default": 0.75, + "description": "ruler / edge / text detection: minimum y position to crop from below (as ratio of total image height)" + }, + "marginLeft": { + "type": "number", + "format": "float", + "default": 0.3, + "description": "ruler / edge / text detection: maximum x position to crop from left (as ratio of total image width)" + }, + "marginRight": { + "type": "number", + "format": "float", + "default": 0.7, + "description": "ruler / edge / text detection: minimum x position to crop from right (as ratio of total image width)" }, + "padding": { + "type": "number", + "format": "integer", + "default": 10, + "description": "extend / shrink border resulting from edge detection / text detection by this many px in each direction" + } + } + }, + "ocrd-anybaseocr-dewarp": { + "executable": "ocrd-anybaseocr-dewarp", + "description": "Dewarps the input image with anyBaseOCR and outputs it as an AlternativeImage", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/dewarping" + ], + "input_file_grp": [ + "OCR-D-IMG-CROP" + ], + "output_file_grp": [ + "OCR-D-IMG-DEWARP" + ], + "parameters": { "resize_mode": { - "default": "resize_and_crop", - "description": "transformation to apply to the original image before input to the network", + "type": "string", "enum": [ "resize_and_crop", "crop", @@ -443,64 +698,96 @@ "scale_width_and_crop", "none" ], - "type": "string" + "default": "resize_and_crop", + "description": "transformation to apply to the original image before input to the network" + }, + "resize_height": { + "type": "number", + "format": "integer", + "default": 1024, + "description": "target image height before input to the network" }, "resize_width": { + "type": "number", + "format": "integer", "default": 1024, - "description": "target image width before input to the network", + "description": "target image width before input to the network" + }, + "model_path": { + "type": "string", + "format": "uri", + "default": "latest_net_G.pth", + "description": "Path to the trained pix2pixHD model", + "cacheable": true, + "content-type": "application/vnd.pytorch" + }, + "gpu_id": { + "type": "number", "format": "integer", - "type": "number" + "default": -1, + "description": "CUDA device ID of GPU to use, or -1 for CPU only" + }, + "operation_level": { + "type": "string", + "enum": [ + "page", + "region" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on (should match what model was trained on!)" } }, - "steps": [ - "preprocessing/optimization/dewarping" + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/dewarping/latest_net_G.pth", + "name": "latest_net_G.pth", + "description": "dewarping model for anybaseocr", + "size": 805292230 + } ] }, - "ocrd-anybaseocr-layout-analysis": { - "categories": [ - "Layout analysis" - ], - "description": "Generates a table-of-content like document structure of the whole document.", - "executable": "ocrd-anybaseocr-layout-analysis", + "ocrd-anybaseocr-tiseg": { + "executable": "ocrd-anybaseocr-tiseg", "input_file_grp": [ "OCR-D-IMG-CROP" ], "output_file_grp": [ - "OCR-D-SEG-LAYOUT" + "OCR-D-SEG-TISEG" + ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/text-nontext" ], + "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.", "parameters": { - "batch_size": { - "default": 4, - "description": "Batch size for generating test images", - "format": "integer", - "type": "number" + "use_deeplr": { + "type": "boolean", + "default": true, + "description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology)." }, - "class_mapping_path": { - "cacheable": true, - "content-type": "application/python-pickle", - "default": "mapping_densenet.pickle", - "description": "File path to layout structure classes", + "seg_weights": { + "type": "string", "format": "uri", - "type": "string" - }, - "model_path": { - "cacheable": true, "content-type": "text/directory", - "default": "structure_analysis", - "description": "Directory path to layout structure classification model", - "format": "uri", - "type": "string" + "cacheable": true, + "default": "seg_model", + "description": "Directory path to deep learning model when use_deeplr is true." } }, - "steps": [ - "layout/analysis" + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/seg_model.tar.gz", + "name": "seg_model", + "description": "text image segmentation model for anybaseocr", + "type": "archive", + "path_in_archive": "seg_model", + "size": 61388872 + } ] }, "ocrd-anybaseocr-textline": { - "categories": [ - "Layout analysis" - ], - "description": "Finds region polygons for each text line in the input image.", "executable": "ocrd-anybaseocr-textline", "input_file_grp": [ "OCR-D-SEG-TISEG" @@ -508,1069 +795,917 @@ "output_file_grp": [ "OCR-D-SEG-LINE-ANY" ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/line" + ], + "description": "Finds region polygons for each text line in the input image.", "parameters": { - "blackseps": { - "default": false, - "description": "also check for black column separators", - "type": "boolean" - }, - "csminaspect": { - "default": 1.1, - "description": "minimum aspect ratio for column separators", + "minscale": { + "type": "number", "format": "float", - "type": "number" + "default": 12.0, + "description": "minimum scale permitted" }, - "csminheight": { - "default": 6.5, - "description": "minimum column height (units=scale)", + "maxlines": { + "type": "number", "format": "float", - "type": "number" + "default": 300, + "description": "non-standard scaling of horizontal parameters" }, - "expand": { - "default": 3, - "description": "expand mask for grayscale extraction", - "format": "integer", - "type": "number" + "scale": { + "type": "number", + "format": "float", + "default": 0.0, + "description": "the basic scale of the document (roughly, xheight) 0=automatic" }, "hscale": { + "type": "number", + "format": "float", "default": 1.0, - "description": "non-standard scaling of horizontal parameters", + "description": "non-standard scaling of horizontal parameters" + }, + "vscale": { + "type": "number", "format": "float", - "type": "number" + "default": 1.7, + "description": "non-standard scaling of vertical parameters" }, - "libpath": { - "default": ".", - "description": "Library Path for C Executables", - "type": "string" + "threshold": { + "type": "number", + "format": "float", + "default": 0.2, + "description": "baseline threshold" }, - "maxcolseps": { - "default": 2, - "description": "maximum # whitespace column separators", + "noise": { + "type": "number", "format": "integer", - "type": "number" + "default": 8, + "description": "noise threshold for removing small components from lines" }, - "maxlines": { - "default": 300, - "description": "non-standard scaling of horizontal parameters", - "format": "float", - "type": "number" + "usegauss": { + "type": "boolean", + "default": false, + "description": "use gaussian instead of uniform" }, "maxseps": { - "default": 2, - "description": "maximum black column separators", + "type": "number", "format": "integer", - "type": "number" - }, - "minscale": { - "default": 12.0, - "description": "minimum scale permitted", - "format": "float", - "type": "number" + "default": 2, + "description": "maximum black column separators" }, - "noise": { - "default": 8, - "description": "noise threshold for removing small components from lines", + "sepwiden": { + "type": "number", "format": "integer", - "type": "number" + "default": 10, + "description": "widen black separators (to account for warping)" }, - "operation_level": { - "default": "region", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region" - ], - "type": "string" - }, - "overwrite": { + "blackseps": { + "type": "boolean", "default": false, - "description": "check whether to overwrite existing text lines", - "type": "boolean" + "description": "also check for black column separators" + }, + "maxcolseps": { + "type": "number", + "format": "integer", + "default": 2, + "description": "maximum # whitespace column separators" + }, + "csminaspect": { + "type": "number", + "format": "float", + "default": 1.1, + "description": "minimum aspect ratio for column separators" + }, + "csminheight": { + "type": "number", + "format": "float", + "default": 6.5, + "description": "minimum column height (units=scale)" }, "pad": { + "type": "number", + "format": "integer", "default": 3, - "description": "padding for extracted lines", + "description": "padding for extracted lines" + }, + "expand": { + "type": "number", "format": "integer", - "type": "number" + "default": 3, + "description": "expand mask for grayscale extraction" }, "parallel": { - "default": 0, - "description": "number of CPUs to use", + "type": "number", "format": "integer", - "type": "number" - }, - "scale": { - "default": 0.0, - "description": "the basic scale of the document (roughly, xheight) 0=automatic", - "format": "float", - "type": "number" + "default": 0, + "description": "number of CPUs to use" }, - "sepwiden": { - "default": 10, - "description": "widen black separators (to account for warping)", - "format": "integer", - "type": "number" + "libpath": { + "type": "string", + "default": ".", + "description": "Library Path for C Executables" }, - "threshold": { - "default": 0.2, - "description": "baseline threshold", - "format": "float", - "type": "number" + "operation_level": { + "type": "string", + "enum": [ + "page", + "region" + ], + "default": "region", + "description": "PAGE XML hierarchy level to operate on" }, - "usegauss": { + "overwrite": { + "type": "boolean", "default": false, - "description": "use gaussian instead of uniform", - "type": "boolean" - }, - "vscale": { - "default": 1.7, - "description": "non-standard scaling of vertical parameters", - "format": "float", - "type": "number" + "description": "check whether to overwrite existing text lines" } - }, - "steps": [ - "layout/segmentation/line" - ] + } }, - "ocrd-anybaseocr-tiseg": { - "categories": [ - "Layout analysis" - ], - "description": "Separates the text and non-text elements with anyBaseOCR. Outputs clipped versions of the input image as AlternativeImage containing either only text or non-text elements.", - "executable": "ocrd-anybaseocr-tiseg", + "ocrd-anybaseocr-layout-analysis": { + "executable": "ocrd-anybaseocr-layout-analysis", "input_file_grp": [ "OCR-D-IMG-CROP" ], "output_file_grp": [ - "OCR-D-SEG-TISEG" + "OCR-D-SEG-LAYOUT" ], - "parameters": { - "seg_weights": { - "cacheable": true, - "content-type": "text/directory", - "default": "seg_model", - "description": "Directory path to deep learning model when use_deeplr is true.", - "format": "uri", - "type": "string" - }, - "use_deeplr": { - "default": true, - "description": "Whether to use deep learning model (UNet pixel classifier) instead of rule-based implementation (multi-resolution morphology).", - "type": "boolean" - } - }, - "steps": [ - "layout/segmentation/text-nontext" - ] - }, - "ocrd-calamari-recognize": { "categories": [ - "Text recognition and optimization" - ], - "description": "Recognize lines with Calamari", - "executable": "ocrd-calamari-recognize", - "input_file_grp": [ - "OCR-D-SEG-LINE" + "Layout analysis" ], - "output_file_grp": [ - "OCR-D-OCR-CALAMARI" + "steps": [ + "layout/analysis" ], + "description": "Generates a table-of-content like document structure of the whole document.", "parameters": { - "checkpoint_dir": { - "cacheable": true, - "content-type": "text/directory", - "default": "qurator-gt4histocr-1.0", - "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory", - "format": "uri", - "type": "string" - }, - "glyph_conf_cutoff": { - "default": 0.001, - "description": "Only include glyph alternatives with confidences above this threshold", - "format": "float", - "type": "number" + "batch_size": { + "type": "number", + "format": "integer", + "default": 4, + "description": "Batch size for generating test images" }, - "textequiv_level": { - "default": "line", - "description": "Deepest PAGE XML hierarchy level to include TextEquiv results for", - "enum": [ - "line", - "word", - "glyph" - ], - "type": "string" + "model_path": { + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "default": "structure_analysis", + "description": "Directory path to layout structure classification model" }, - "voter": { - "default": "confidence_voter_default_ctc", - "description": "The voting algorithm to use", - "type": "string" + "class_mapping_path": { + "type": "string", + "format": "uri", + "content-type": "application/python-pickle", + "cacheable": true, + "default": "mapping_densenet.pickle", + "description": "File path to layout structure classes" } }, - "steps": [ - "recognition/text-recognition" + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/structure_analysis.tar.gz", + "name": "structure_analysis", + "description": "structure analysis model for anybaseocr", + "type": "archive", + "path_in_archive": "structure_analysis", + "size": 29002514 + }, + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/layoutAnalysis/mapping_densenet.pickle", + "name": "mapping_densenet.pickle", + "description": "mapping model for anybaseocr", + "size": 374 + } ] }, - "ocrd-cis-align": { - "categories": [ - "Text recognition and optimization" - ], - "description": "Align multiple OCRs and/or GTs", - "executable": "ocrd-cis-align", + "ocrd-anybaseocr-block-segmentation": { + "executable": "ocrd-anybaseocr-block-segmentation", "input_file_grp": [ - "OCR-D-OCR-1", - "OCR-D-OCR-2", - "OCR-D-OCR-N" + "OCR-D-IMG" ], "output_file_grp": [ - "OCR-D-ALIGNED" + "OCR-D-SEG-BLOCK" ], - "steps": [ - "recognition/post-correction" - ] - }, - "ocrd-cis-ocropy-binarize": { "categories": [ - "Image preprocessing" - ], - "description": "Binarize (and optionally deskew/despeckle) pages / regions / lines with ocropy", - "executable": "ocrd-cis-ocropy-binarize", - "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" + "Layout analysis" ], - "output_file_grp": [ - "OCR-D-IMG-BIN", - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" + "steps": [ + "layout/segmentation/region" ], + "description": "Segments and classifies regions in each single page and annotates the the region polygons and classes.", "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", - "format": "float", - "type": "number" + "block_segmentation_weights": { + "type": "string", + "format": "uri", + "content-type": "application/x-hdf;subtype=bag", + "cacheable": true, + "default": "block_segmentation_weights.h5", + "description": "Path to model weights" }, - "grayscale": { + "overwrite": { + "type": "boolean", "default": false, - "description": "for the 'ocropy' method, produce grayscale-normalized instead of thresholded image", - "type": "boolean" - }, - "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level granularity to annotate images for", - "enum": [ - "page", - "table", - "region", - "line" - ], - "type": "string" - }, - "maxskew": { - "default": 0.0, - "description": "modulus of maximum skewing angle (in degrees) to detect (larger will be slower, 0 will deactivate deskewing)", - "format": "float", - "type": "number" + "description": "whether to delete existing text lines prior to segmentation" }, - "method": { - "default": "ocropy", - "description": "binarization method to use (only 'ocropy' will include deskewing and denoising)", - "enum": [ - "none", - "global", - "otsu", - "gauss-otsu", - "ocropy" - ], - "type": "string" - }, - "noise_maxsize": { - "default": 0, - "description": "maximum pixel number for connected components to regard as noise (0 will deactivate denoising)", - "format": "int", - "type": "number" - }, - "threshold": { - "default": 0.5, - "description": "for the 'ocropy' and ' global' method, black/white threshold to apply on the whitelevel normalized image (the larger the more/heavier foreground)", - "format": "float", - "type": "number" - } - }, - "steps": [ - "preprocessing/optimization/binarization", - "preprocessing/optimization/grayscale_normalization", - "preprocessing/optimization/deskewing" - ] - }, - "ocrd-cis-ocropy-clip": { - "categories": [ - "Layout analysis" - ], - "description": "Clip text regions / lines at intersections with neighbours", - "executable": "ocrd-cis-ocropy-clip", - "input_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", - "format": "float", - "type": "number" + "th": { + "type": "number", + "format": "integer", + "default": 15, + "description": "num of pixels to include in the area region (when applying text/non-text mask from tiseg)" }, - "level-of-operation": { - "default": "region", - "description": "PAGE XML hierarchy level granularity to annotate images for", - "enum": [ - "region", - "line" + "active_classes": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "page-number", + "paragraph", + "catch-word", + "heading", + "drop-capital", + "signature-mark", + "header", + "marginalia", + "footnote", + "footnote-continued", + "caption", + "endnote", + "footer", + "keynote", + "image", + "table", + "graphics" + ] + }, + "default": [ + "page-number", + "paragraph", + "catch-word", + "heading", + "drop-capital", + "signature-mark", + "marginalia", + "caption" ], - "type": "string" + "description": "Restrict types of regions to be detected." }, - "min_fraction": { - "default": 0.7, - "description": "share of foreground pixels that must be retained by the largest label", - "format": "float", - "type": "number" - } - }, - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line" - ] - }, - "ocrd-cis-ocropy-denoise": { - "categories": [ - "Image preprocessing" - ], - "description": "Despeckle pages / regions / lines with ocropy", - "executable": "ocrd-cis-ocropy-denoise", - "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-IMG-DESPECK", - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", - "format": "float", - "type": "number" + "post_process": { + "type": "boolean", + "default": true, + "description": "whether to apply non-maximum suppression (across classes) on the detections" }, - "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level granularity to annotate images for", - "enum": [ - "page", - "region", - "line" - ], - "type": "string" + "use_masks": { + "type": "boolean", + "default": true, + "description": "whether to segment from the mask as polygon instead of just the bbox" }, - "noise_maxsize": { - "default": 3.0, - "description": "maximum size in points (pt) for connected components to regard as noise (0 will deactivate denoising)", + "min_confidence": { + "type": "number", "format": "float", - "type": "number" - } - }, - "steps": [ - "preprocessing/optimization/despeckling" - ] - }, - "ocrd-cis-ocropy-deskew": { - "categories": [ - "Image preprocessing" - ], - "description": "Deskew regions with ocropy (by annotating orientation angle and adding AlternativeImage)", - "executable": "ocrd-cis-ocropy-deskew", - "input_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" - ], - "parameters": { - "level-of-operation": { - "default": "region", - "description": "PAGE XML hierarchy level granularity to annotate images for", - "enum": [ - "page", - "table", - "region" - ], - "type": "string" + "default": 0.9, + "description": "Confidence threshold for region detections" }, - "maxskew": { - "default": 5.0, - "description": "modulus of maximum skewing angle to detect (larger will be slower, 0 will deactivate deskewing)", - "type": "number" - } - }, - "steps": [ - "preprocessing/optimization/deskewing" - ] - }, - "ocrd-cis-ocropy-dewarp": { - "categories": [ - "Image preprocessing" - ], - "description": "Dewarp line images with ocropy", - "executable": "ocrd-cis-ocropy-dewarp", - "input_file_grp": [ - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-SEG-LINE" - ], - "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "min_share_drop": { + "type": "number", "format": "float", - "type": "number" + "default": 0.9, + "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to suppress smaller prediction" }, - "max_neighbour": { - "default": 0.05, - "description": "maximum rate of foreground pixels intruding from neighbouring lines (line will not be processed above that)", + "min_share_merge": { + "type": "number", "format": "float", - "type": "number" + "default": 0.8, + "description": "Minimum required overlap (intersection over single) of mask-derived contour area between neighbours to merge smaller prediction" }, - "range": { - "default": 4.0, - "description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels); also the mean vertical padding", + "min_iou_drop": { + "type": "number", "format": "float", - "type": "number" + "default": 0.8, + "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to suppress prediction scoring worse" }, - "smoothness": { - "default": 1.0, - "description": "kernel size (relative to image height) of horizontal blur applied to foreground to find the center line; the smaller the more dynamic (0.1 would be a better default)", + "min_iou_merge": { + "type": "number", "format": "float", - "type": "number" - } - }, - "steps": [ - "preprocessing/optimization/dewarping" - ] - }, - "ocrd-cis-ocropy-rec": { - "categories": [ - "Text recognition and optimization" - ], - "description": "Recognize text snippets", - "executable": "ocrd-cis-ocropy-rec", - "input_file_grp": [ - "OCR-D-GT-SEG-BLOCK", - "OCR-D-SEG-BLOCK" - ], - "parameters": { - "model": { - "description": "ocropy model to apply (e.g. fraktur.pyrnn)", - "type": "string" + "default": 0.2, + "description": "Minimum required overlap (intersection over union) of mask-derived contour area between neighbours to merge prediction scoring worse" } }, - "steps": [ - "recognition/text-recognition" + "resources": [ + { + "url": "https://s3.gwdg.de/ocr-d/models/dfki/segmentation/block_segmentation_weights.h5", + "name": "block_segmentation_weights.h5", + "description": "block segmentation model for anybaseocr", + "size": 256139800 + } ] }, - "ocrd-cis-ocropy-recognize": { + "ocrd-calamari-recognize": { + "executable": "ocrd-calamari-recognize", "categories": [ "Text recognition and optimization" ], - "description": "Recognize text in (binarized+deskewed+dewarped) lines with ocropy", - "executable": "ocrd-cis-ocropy-recognize", + "steps": [ + "recognition/text-recognition" + ], + "description": "Recognize lines with Calamari", "input_file_grp": [ - "OCR-D-SEG-LINE", - "OCR-D-SEG-WORD", - "OCR-D-SEG-GLYPH" + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-OCR-OCRO" + "OCR-D-OCR-CALAMARI" ], "parameters": { - "model": { - "description": "ocropy model to apply (e.g. fraktur.pyrnn)", - "type": "string" + "checkpoint_dir": { + "description": "The directory containing calamari model files (*.ckpt.json). Uses all checkpoints in that directory", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "cacheable": true, + "default": "qurator-gt4histocr-1.0" + }, + "voter": { + "description": "The voting algorithm to use", + "type": "string", + "default": "confidence_voter_default_ctc" }, "textequiv_level": { - "default": "line", - "description": "PAGE XML hierarchy level granularity to add the TextEquiv results to", + "type": "string", "enum": [ "line", "word", "glyph" ], - "type": "string" + "default": "line", + "description": "Deepest PAGE XML hierarchy level to include TextEquiv results for" + }, + "glyph_conf_cutoff": { + "type": "number", + "format": "float", + "default": 0.001, + "description": "Only include glyph alternatives with confidences above this threshold" } - }, - "steps": [ - "recognition/text-recognition" - ] + } }, - "ocrd-cis-ocropy-resegment": { + "ocrd-cis-ocropy-binarize": { + "executable": "ocrd-cis-ocropy-binarize", "categories": [ - "Layout analysis" + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization", + "preprocessing/optimization/grayscale_normalization", + "preprocessing/optimization/deskewing" ], - "description": "Resegment text lines", - "executable": "ocrd-cis-ocropy-resegment", "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", "OCR-D-SEG-LINE" ], "output_file_grp": [ + "OCR-D-IMG-BIN", + "OCR-D-SEG-BLOCK", "OCR-D-SEG-LINE" ], + "description": "Binarize (and optionally deskew/despeckle) pages / regions / lines with ocropy", "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "method": { + "type": "string", + "enum": [ + "none", + "global", + "otsu", + "gauss-otsu", + "ocropy" + ], + "description": "binarization method to use (only 'ocropy' will include deskewing and denoising)", + "default": "ocropy" + }, + "threshold": { + "type": "number", "format": "float", - "type": "number" + "description": "for the 'ocropy' and ' global' method, black/white threshold to apply on the whitelevel normalized image (the larger the more/heavier foreground)", + "default": 0.5 }, - "extend_margins": { - "default": 3, - "description": "number of pixels to extend the input polygons in all directions", - "format": "integer", - "type": "number" + "grayscale": { + "type": "boolean", + "description": "for the 'ocropy' method, produce grayscale-normalized instead of thresholded image", + "default": false + }, + "maxskew": { + "type": "number", + "format": "float", + "description": "modulus of maximum skewing angle (in degrees) to detect (larger will be slower, 0 will deactivate deskewing)", + "default": 0.0 + }, + "noise_maxsize": { + "type": "number", + "format": "int", + "description": "maximum pixel number for connected components to regard as noise (0 will deactivate denoising)", + "default": 0 + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 }, "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to segment textlines in ('region' abides by existing text region boundaries, 'page' optimises lines in the whole page once", + "type": "string", "enum": [ "page", - "region" - ], - "type": "string" - }, - "method": { - "default": "lineest", - "description": "source for new line polygon candidates ('lineest' for line estimation, i.e. how Ocropy would have segmented text lines; 'baseline' tries to re-polygonize from the baseline annotation; 'ccomps' avoids crossing connected components by majority rule)", - "enum": [ - "lineest", - "baseline", - "ccomps" + "table", + "region", + "line" ], - "type": "string" - }, - "min_fraction": { - "default": 0.75, - "description": "share of foreground pixels that must be retained by the output polygons", - "format": "float", - "type": "number" + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "page" } - }, - "steps": [ - "layout/segmentation/line" - ] + } }, - "ocrd-cis-ocropy-segment": { + "ocrd-cis-ocropy-deskew": { + "executable": "ocrd-cis-ocropy-deskew", "categories": [ - "Layout analysis" + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/deskewing" ], - "description": "Segment pages into regions and lines, tables into cells and lines, or regions into lines with ocropy", - "executable": "ocrd-cis-ocropy-segment", "input_file_grp": [ - "OCR-D-GT-SEG-BLOCK", - "OCR-D-SEG-BLOCK" + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" ], "output_file_grp": [ + "OCR-D-SEG-BLOCK", "OCR-D-SEG-LINE" ], + "description": "Deskew regions with ocropy (by annotating orientation angle and adding AlternativeImage)", "parameters": { - "csminheight": { - "default": 4, - "description": "(when operating on the page/table level) minimum height of white/background or black/foreground column separators in multiples of scale/capheight, counted piece-wise", - "format": "integer", - "type": "number" - }, - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative; when disabled and no meta-data is found, 300 is assumed", - "format": "float", - "type": "number" - }, - "gap_height": { - "default": 0.01, - "description": "(when operating on the page/table level) largest minimum pixel average in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be larger when more foreground noise is present, reduce to avoid mistaking text for noise", - "format": "float", - "type": "number" - }, - "gap_width": { - "default": 1.5, - "description": "(when operating on the page/table level) smallest width in multiples of scale/capheight of a valley in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be smaller when more foreground noise is present, increase to avoid mistaking inter-line as paragraph gaps and inter-word as inter-column gaps", - "format": "float", - "type": "number" - }, - "hlminwidth": { - "default": 10, - "description": "(when operating on the page/table level) minimum width of black/foreground horizontal separators in multiples of scale/capheight, counted piece-wise", - "format": "integer", - "type": "number" + "maxskew": { + "type": "number", + "description": "modulus of maximum skewing angle to detect (larger will be slower, 0 will deactivate deskewing)", + "default": 5.0 }, "level-of-operation": { - "default": "region", - "description": "PAGE XML hierarchy level to read images from and add elements to", + "type": "string", "enum": [ "page", "table", "region" ], - "type": "string" - }, - "maxcolseps": { - "default": 20, - "description": "(when operating on the page/table level) maximum number of white/background column separators to detect, counted piece-wise", - "format": "integer", - "type": "number" - }, - "maximages": { - "default": 10, - "description": "(when operating on the page level) maximum number of black/foreground very large components to detect (and suppress), counted piece-wise", - "format": "integer", - "type": "number" - }, - "maxseps": { - "default": 20, - "description": "(when operating on the page/table level) number of black/foreground column separators to detect (and suppress), counted piece-wise", - "format": "integer", - "type": "number" - }, - "overwrite_lines": { - "default": true, - "description": "(when operating on the region level) remove any existing TextLine elements; otherwise append", - "type": "boolean" - }, - "overwrite_order": { - "default": true, - "description": "(when operating on the page/table level) remove any references for existing TextRegion elements within the top (page/table) reading order; otherwise append", - "type": "boolean" - }, - "overwrite_regions": { - "default": true, - "description": "(when operating on the page/table level) remove any existing TextRegion elements; otherwise append", - "type": "boolean" - }, - "overwrite_separators": { - "default": true, - "description": "(when operating on the page/table level) remove any existing SeparatorRegion elements; otherwise append", - "type": "boolean" - }, - "spread": { - "default": 2.4, - "description": "distance in points (pt) from the foreground to project text line (or text region) labels into the background for polygonal contours; if zero, project half a scale/capheight", - "format": "float", - "type": "number" + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "region" } - }, - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line" - ] + } }, - "ocrd-cis-ocropy-train": { + "ocrd-cis-ocropy-denoise": { + "executable": "ocrd-cis-ocropy-denoise", "categories": [ - "Text recognition and optimization" + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/despeckling" ], - "description": "train model with ground truth from mets data", - "executable": "ocrd-cis-ocropy-train", "input_file_grp": [ - "OCR-D-GT-SEG-BLOCK", - "OCR-D-SEG-BLOCK" + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-IMG-DESPECK", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" ], + "description": "Despeckle pages / regions / lines with ocropy", "parameters": { - "model": { - "description": "load model or crate new one (e.g. fraktur.pyrnn)", - "type": "string" - }, - "ntrain": { - "default": 1000000, - "description": "lines to train before stopping", - "format": "integer", - "type": "number" + "noise_maxsize": { + "type": "number", + "format": "float", + "description": "maximum size in points (pt) for connected components to regard as noise (0 will deactivate denoising)", + "default": 3.0 }, - "outputpath": { - "description": "(existing) path for the trained model", - "type": "string" + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 }, - "textequiv_level": { - "default": "line", - "description": "PAGE XML hierarchy level granularity", + "level-of-operation": { + "type": "string", "enum": [ - "line", - "word", - "glyph" + "page", + "region", + "line" ], - "type": "string" + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "page" } - }, - "steps": [ - "recognition/text-recognition" - ] + } }, - "ocrd-cis-postcorrect": { + "ocrd-cis-ocropy-clip": { + "executable": "ocrd-cis-ocropy-clip", "categories": [ - "Text recognition and optimization" + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" ], - "description": "Post correct OCR results", - "executable": "ocrd-cis-postcorrect", "input_file_grp": [ - "OCR-D-LINE-ALIGNED" + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-POST-CORRECTED" + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" ], + "description": "Clip text regions / lines at intersections with neighbours", "parameters": { - "maxCandidates": { - "default": 10, - "description": "Maximum number of considered correction candidates per suspicious token", - "format": "integer", - "type": "number" - }, - "model": { - "description": "Path to the post correction model file", - "required": true, - "type": "string" - }, - "nOCR": { - "default": 1, - "description": "Number of parallel OCR's to use for the post correction", - "format": "integer", - "type": "number" - }, - "profilerConfig": { - "description": "Path to the profiler's language config file", - "required": true, - "type": "string" + "level-of-operation": { + "type": "string", + "enum": [ + "region", + "line" + ], + "description": "PAGE XML hierarchy level granularity to annotate images for", + "default": "region" }, - "profilerPath": { - "description": "Path to the profiler executable", - "required": true, - "type": "string" + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 }, - "runLE": { - "default": false, - "description": "Do run the lexicon extension step for the post correction", - "type": "boolean" + "min_fraction": { + "type": "number", + "format": "float", + "description": "share of foreground pixels that must be retained by the largest label", + "default": 0.7 } - }, - "steps": [ - "recognition/post-correction" - ] + } }, - "ocrd-cor-asv-ann-align": { + "ocrd-cis-ocropy-resegment": { + "executable": "ocrd-cis-ocropy-resegment", "categories": [ - "Text recognition and optimization" + "Layout analysis" + ], + "steps": [ + "layout/segmentation/line" ], - "description": "Align different textline annotations and pick best", - "executable": "ocrd-cor-asv-ann-align", "input_file_grp": [ - "OCR-D-GT-SEG-LINE", - "OCR-D-OCR-TESS", - "OCR-D-OCR-KRAK", - "OCR-D-OCR-OCRO", - "OCR-D-OCR-CALA", - "OCR-D-OCR-ANY", - "OCR-D-COR-ASV" + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-OCR-MULTI" + "OCR-D-SEG-LINE" ], + "description": "Improve coordinates of text lines", "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region" + ], + "description": "PAGE XML hierarchy level to segment textlines in ('region' abides by existing text region boundaries, 'page' optimises lines in the whole page once", + "default": "page" + }, "method": { - "default": "majority", - "description": "decide by majority of OCR hypotheses, by highest confidence of OCRs or by a combination thereof", + "type": "string", "enum": [ - "majority", - "confidence", - "combined" + "lineest", + "baseline", + "ccomps" ], - "type": "string" + "description": "source for new line polygon candidates ('lineest' for line estimation, i.e. how Ocropy would have segmented text lines; 'baseline' tries to re-polygonize from the baseline annotation; 'ccomps' avoids crossing connected components by majority rule)", + "default": "lineest" + }, + "baseline_only": { + "type": "boolean", + "description": "ignore existing textline coords completely and use baseline as input if possible", + "default": false + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 + }, + "min_fraction": { + "type": "number", + "format": "float", + "description": "share of foreground pixels that must be retained by the output polygons", + "default": 0.75 + }, + "extend_margins": { + "type": "number", + "format": "integer", + "description": "number of pixels to extend the input polygons in all directions", + "default": 3 } - }, - "steps": [ - "recognition/post-correction" - ] + } }, - "ocrd-cor-asv-ann-evaluate": { + "ocrd-cis-ocropy-dewarp": { + "executable": "ocrd-cis-ocropy-dewarp", "categories": [ - "Text recognition and optimization" + "Image preprocessing" ], - "description": "Align different textline annotations and compute distance", - "executable": "ocrd-cor-asv-ann-evaluate", + "steps": [ + "preprocessing/optimization/dewarping" + ], + "description": "Dewarp line images with ocropy", "input_file_grp": [ - "OCR-D-GT-SEG-LINE", - "OCR-D-OCR-TESS", - "OCR-D-OCR-KRAK", - "OCR-D-OCR-OCRO", - "OCR-D-OCR-CALA", - "OCR-D-OCR-ANY", - "OCR-D-COR-ASV" + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-EVAL-CER" + "OCR-D-SEG-LINE" ], "parameters": { - "confusion": { - "default": 0, - "description": "Count edits and show that number of most frequent confusions (non-identity) in the end.", - "format": "integer", - "minimum": 0, - "type": "number" + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative", + "default": 0 }, - "gt_level": { - "default": 1, - "description": "When `metric=historic_latin`, normalize and equate at this GT transcription level.", - "enum": [ - 1, - 2, - 3 - ], - "type": "number" + "range": { + "type": "number", + "format": "float", + "description": "maximum vertical disposition or maximum margin (will be multiplied by mean centerline deltas to yield pixels); also the mean vertical padding", + "default": 4.0 }, - "histogram": { - "default": false, - "description": "Aggregate and show mutual character histograms.", - "type": "boolean" + "smoothness": { + "type": "number", + "format": "float", + "description": "kernel size (relative to image height) of horizontal blur applied to foreground to find the center line; the smaller the more dynamic (0.1 would be a better default)", + "default": 1.0 }, - "metric": { - "default": "Levenshtein-fast", - "description": "Distance metric to calculate and aggregate: `historic_latin` for GT level 1-3, `NFKC` for roughly GT level 2 (but including reduction of `\u017f/s` and superscript numerals etc), `Levenshtein` for GT level 3 (or `Levenshtein-fast` for faster alignment - but using maximum sequence length instead of path length as CER denominator, and without confusion statistics).", - "enum": [ - "Levenshtein-fast", - "Levenshtein", - "NFC", - "NFKC", - "historic_latin" - ], - "type": "string" + "max_neighbour": { + "type": "number", + "format": "float", + "description": "maximum rate of foreground pixels intruding from neighbouring lines (line will not be processed above that)", + "default": 0.05 } - }, - "steps": [ - "recognition/evaluation" - ] + } }, - "ocrd-cor-asv-ann-join": { + "ocrd-cis-ocropy-recognize": { + "executable": "ocrd-cis-ocropy-recognize", "categories": [ "Text recognition and optimization" ], - "description": "Join different textline annotations by concatenation", - "executable": "ocrd-cor-asv-ann-join", + "steps": [ + "recognition/text-recognition" + ], + "description": "Recognize text in (binarized+deskewed+dewarped) lines with ocropy", "input_file_grp": [ - "OCR-D-GT-SEG-LINE", - "OCR-D-OCR-TESS", - "OCR-D-OCR-KRAK", - "OCR-D-OCR-OCRO", - "OCR-D-OCR-CALA", - "OCR-D-OCR-ANY", - "OCR-D-COR-ASV" + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD", + "OCR-D-SEG-GLYPH" ], "output_file_grp": [ - "OCR-D-OCR-MULTI" + "OCR-D-OCR-OCRO" ], "parameters": { - "add-filegrp-comments": { - "default": false, - "description": "set @comments of each TextEquiv to the fileGrp it came from", - "type": "boolean" + "textequiv_level": { + "type": "string", + "enum": [ + "line", + "word", + "glyph" + ], + "description": "PAGE XML hierarchy level granularity to add the TextEquiv results to", + "default": "line" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/gzip", + "description": "ocropy model to apply (e.g. fraktur.pyrnn.gz)" } - }, - "steps": [ - "recognition/post-correction" - ] + } }, - "ocrd-cor-asv-ann-mark": { + "ocrd-cis-ocropy-segment": { + "executable": "ocrd-cis-ocropy-segment", "categories": [ - "Text recognition and optimization" + "Layout analysis" ], - "description": "mark words not found by a spellchecker", - "executable": "ocrd-cor-asv-ann-mark", + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" + ], + "input_file_grp": [ + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-LINE" + ], + "description": "Segment pages into regions and lines, tables into cells and lines, or regions into lines with ocropy", "parameters": { - "command": { - "description": "external tool to query word forms, e.g. 'hunspell -i utf-8 -d de_DE,en_US -w'", - "required": true, - "type": "string" + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero or negative; when disabled and no meta-data is found, 300 is assumed", + "default": 0 + }, + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "table", + "region" + ], + "description": "PAGE XML hierarchy level to read images from and add elements to", + "default": "region" + }, + "maxcolseps": { + "type": "number", + "format": "integer", + "default": 20, + "description": "(when operating on the page/table level) maximum number of white/background column separators to detect, counted piece-wise" + }, + "maxseps": { + "type": "number", + "format": "integer", + "default": 20, + "description": "(when operating on the page/table level) number of black/foreground column separators to detect (and suppress), counted piece-wise" + }, + "maximages": { + "type": "number", + "format": "integer", + "default": 10, + "description": "(when operating on the page level) maximum number of black/foreground very large components to detect (and suppress), counted piece-wise" + }, + "csminheight": { + "type": "number", + "format": "integer", + "default": 4, + "description": "(when operating on the page/table level) minimum height of white/background or black/foreground column separators in multiples of scale/capheight, counted piece-wise" + }, + "hlminwidth": { + "type": "number", + "format": "integer", + "default": 10, + "description": "(when operating on the page/table level) minimum width of black/foreground horizontal separators in multiples of scale/capheight, counted piece-wise" + }, + "gap_height": { + "type": "number", + "format": "float", + "default": 0.01, + "description": "(when operating on the page/table level) largest minimum pixel average in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be larger when more foreground noise is present, reduce to avoid mistaking text for noise" + }, + "gap_width": { + "type": "number", + "format": "float", + "default": 1.5, + "description": "(when operating on the page/table level) smallest width in multiples of scale/capheight of a valley in the horizontal or vertical profiles (across the binarized image) to still be regarded as a gap during recursive X-Y cut from lines to regions; needs to be smaller when more foreground noise is present, increase to avoid mistaking inter-line as paragraph gaps and inter-word as inter-column gaps" }, - "format": { - "default": "conf", - "description": "how unknown words should be marked; if 'conf', then writes @conf=0.123, otherwise writes that value into @comments", - "type": "string" + "overwrite_order": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any references for existing TextRegion elements within the top (page/table) reading order; otherwise append" }, - "normalization": { - "default": {}, - "description": "mapping of characters prior to spellcheck, e.g. {'\u017f': 's', 'a\u0364': '\u00e4'}", - "type": "object" + "overwrite_separators": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any existing SeparatorRegion elements; otherwise append" + }, + "overwrite_regions": { + "type": "boolean", + "default": true, + "description": "(when operating on the page/table level) remove any existing TextRegion elements; otherwise append" + }, + "overwrite_lines": { + "type": "boolean", + "default": true, + "description": "(when operating on the region level) remove any existing TextLine elements; otherwise append" + }, + "spread": { + "type": "number", + "format": "float", + "default": 2.4, + "description": "distance in points (pt) from the foreground to project text line (or text region) labels into the background for polygonal contours; if zero, project half a scale/capheight" } - }, - "steps": [ - "recognition/post-correction" - ] + } }, - "ocrd-cor-asv-ann-process": { + "ocrd-cis-ocropy-train": { + "executable": "ocrd-cis-ocropy-train", "categories": [ "Text recognition and optimization" ], - "description": "Improve text annotation by character-level encoder-attention-decoder ANN model", - "executable": "ocrd-cor-asv-ann-process", - "input_file_grp": [ - "OCR-D-OCR-TESS", - "OCR-D-OCR-KRAK", - "OCR-D-OCR-OCRO", - "OCR-D-OCR-CALA", - "OCR-D-OCR-ANY" + "steps": [ + "recognition/text-recognition" ], - "output_file_grp": [ - "OCR-D-COR-ASV" + "input_file_grp": [ + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" ], + "description": "train model with ground truth from mets data", "parameters": { - "charmap": { - "default": {}, - "description": "mapping for input characters before passing to correction; can be used to adapt to character set mismatch between input and model (without relying on underspecification alone)", - "type": "object" - }, - "fast_mode": { - "default": false, - "description": "decode greedy instead of beamed, with batches of parallel lines instead of parallel alternatives; also disables rejection and beam parameters; enable if performance is far more important than quality", - "type": "boolean" - }, - "fixed_beam_width": { - "default": 15, - "description": "maximum number of candidates allowed to enter the beam in each hypothesis; controls the quality/performance trade-off", - "format": "integer", - "type": "number" - }, - "model_file": { - "cacheable": true, - "content-type": "application/x-hdf;subtype=bag", - "description": "path of h5py weight/config file for model trained with cor-asv-ann-train", - "format": "uri", - "required": true, - "type": "string" - }, - "rejection_threshold": { - "default": 0.5, - "description": "minimum probability of the candidate corresponding to the input character in each hypothesis during beam search, helps balance precision/recall trade-off; set to 0 to disable rejection (max recall) or 1 to disable correction (max precision)", - "format": "float", - "type": "number" - }, - "relative_beam_width": { - "default": 0.2, - "description": "minimum fraction of the best candidate's probability required to enter the beam in each hypothesis; controls the quality/performance trade-off", - "format": "float", - "type": "number" - }, "textequiv_level": { - "default": "glyph", - "description": "PAGE XML hierarchy level to read/write TextEquiv input/output on", + "type": "string", + "description": "hierarchy level to extract GT pairs from", "enum": [ "line", "word", "glyph" ], - "type": "string" + "default": "line" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/gzip", + "description": "load model (e.g. 'fraktur.pyrnn.gz') to init weights, or none to train from scratch" + }, + "ntrain": { + "type": "number", + "format": "integer", + "description": "lines to train before stopping", + "default": 1000000 + }, + "outputpath": { + "type": "string", + "description": "(existing) path for the trained model" } - }, + } + }, + "ocrd-cis-align": { + "executable": "ocrd-cis-align", + "categories": [ + "Text recognition and optimization" + ], "steps": [ "recognition/post-correction" - ] + ], + "input_file_grp": [ + "OCR-D-OCR-1", + "OCR-D-OCR-2", + "OCR-D-OCR-N" + ], + "output_file_grp": [ + "OCR-D-ALIGNED" + ], + "description": "Align multiple OCRs and/or GTs" }, - "ocrd-cor-asv-fst-process": { + "ocrd-cis-postcorrect": { + "executable": "ocrd-cis-postcorrect", "categories": [ "Text recognition and optimization" ], - "description": "Improve text annotation by FST error and lexicon model with character-level LSTM language model", - "executable": "ocrd-cor-asv-fst-process", + "steps": [ + "recognition/post-correction" + ], + "description": "Post correct OCR results", "input_file_grp": [ - "OCR-D-OCR-TESS", - "OCR-D-OCR-KRAK", - "OCR-D-OCR-OCRO", - "OCR-D-OCR-CALA", - "OCR-D-OCR-ANY" + "OCR-D-LINE-ALIGNED" ], "output_file_grp": [ - "OCR-D-COR-ASV" + "OCR-D-POST-CORRECTED" ], "parameters": { - "beam_width": { - "default": 100, - "description": "maximum number of best partial paths to consider during beam search in language modelling", + "maxCandidates": { + "description": "Maximum number of considered correction candidates per suspicious token", + "type": "number", "format": "integer", - "type": "number" - }, - "errorfst_file": { - "cacheable": true, - "content-type": "application/vnd.openfst", - "description": "path of FST file for error model", - "format": "uri", - "required": true, - "type": "string" + "default": 10 }, - "keraslm_file": { - "cacheable": true, - "content-type": "application/x-hdf;subtype=bag", - "description": "path of h5py weight/config file for language model trained with keraslm", - "format": "uri", + "profilerPath": { + "description": "Path to the profiler executable", "required": true, "type": "string" }, - "lexiconfst_file": { - "cacheable": true, - "content-type": "application/vnd.openfst", - "description": "path of FST file for lexicon model", - "format": "uri", + "profilerConfig": { + "description": "Path to the profiler's language config file", "required": true, "type": "string" }, - "lm_weight": { - "default": 0.5, - "description": "share of the LM scores over the FST output confidences", - "format": "float", - "type": "number" - }, - "pruning_weight": { - "default": 5.0, - "description": "transition weight for pruning the hypotheses in each word window FST", - "format": "float", - "type": "number" + "model": { + "description": "Path to the post correction model file", + "type": "string", + "required": true }, - "rejection_weight": { - "default": 1.5, - "description": "transition weight (per character) for unchanged input in each word window FST", - "format": "float", - "type": "number" + "nOCR": { + "description": "Number of parallel OCR's to use for the post correction", + "type": "number", + "format": "integer", + "default": 1 }, - "textequiv_level": { - "default": "word", - "description": "PAGE XML hierarchy level to read TextEquiv input on (output will always be word level)", - "enum": [ - "word" - ], - "type": "string" + "runLE": { + "description": "Do run the lexicon extension step for the post correction", + "type": "boolean", + "default": false } - }, - "steps": [ - "recognition/post-correction" - ] + } }, "ocrd-detectron2-segment": { + "executable": "ocrd-detectron2-segment", "categories": [ "Layout analysis" ], + "steps": [ + "layout/segmentation/region" + ], "description": "Detect regions with Detectron2 models", - "executable": "ocrd-detectron2-segment", "input_file_grp": [ "OCR-D-IMG" ], @@ -1578,259 +1713,288 @@ "OCR-D-SEG-REGION" ], "parameters": { + "operation_level": { + "type": "string", + "enum": [ + "page", + "table" + ], + "default": "page", + "description": "hierarchy level which to predict and assign regions for" + }, "categories": { - "description": "maps each region category (position) of the model to a PAGE region type (and @type or @custom if separated by colon), e.g. ['TextRegion:paragraph', 'TextRegion:heading', 'TextRegion:floating', 'TableRegion', 'ImageRegion'] for PubLayNet; categories with an empty string will be skipped during prediction", + "type": "array", "required": true, - "type": "array" - }, - "device": { - "default": "cuda", - "description": "select computing device for Torch (e.g. cpu or cuda:0); will fall back to CPU if no GPU is available", - "type": "string" - }, - "min_confidence": { - "default": 0.5, - "description": "confidence threshold for detections", - "format": "float", - "type": "number" + "description": "maps each region category (position) of the model to a PAGE region type (and @type or @custom if separated by colon), e.g. ['TextRegion:paragraph', 'TextRegion:heading', 'TextRegion:floating', 'TableRegion', 'ImageRegion'] for PubLayNet; categories with an empty string will be skipped during prediction" }, "model_config": { - "content-type": "text/yaml", - "description": "path name of model config", + "type": "string", "format": "uri", + "content-type": "text/yaml", "required": true, - "type": "string" + "description": "path name of model config" }, "model_weights": { - "content-type": "application/octet-stream", - "description": "path name of model weights", + "type": "string", "format": "uri", + "content-type": "application/octet-stream", "required": true, - "type": "string" + "description": "path name of model weights" }, - "operation_level": { - "default": "page", - "description": "hierarchy level which to predict and assign regions for", + "min_confidence": { + "type": "number", + "format": "float", + "default": 0.5, + "description": "confidence threshold for detections" + }, + "postprocessing": { + "type": "string", "enum": [ - "page", - "table" + "full", + "only-nms", + "only-morph", + "none" ], - "type": "string" + "default": "full", + "description": "which postprocessing steps to enable: by default, applies a custom non-maximum suppression (to avoid overlaps) and morphological operations (using connected component analysis on the binarized input image to shrink or expand regions)" + }, + "debug_img": { + "type": "string", + "enum": [ + "none", + "instance_colors", + "instance_colors_only", + "category_colors" + ], + "default": "none", + "description": "paint an AlternativeImage which blends the input image and all raw decoded region candidates" + }, + "device": { + "type": "string", + "default": "cuda", + "description": "select computing device for Torch (e.g. cpu or cuda:0); will fall back to CPU if no GPU is available" } }, "resources": [ { - "description": "TableBank via LayoutLM R152-FPN config", + "description": "TableBank via LayoutLM X152-FPN config", "name": "TableBank_X152.yaml", "size": 536, - "url": "https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/All_X152.yaml" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152.yaml" }, { - "description": "TableBank via LayoutLM R152-FPN weights", + "description": "TableBank via LayoutLM X152-FPN weights", "name": "TableBank_X152.pth", "size": 1103832675, - "url": "https://layoutlm.blob.core.windows.net/tablebank/model_zoo/detection/All_X152/model_final.pth" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152.pth" + }, + { + "description": "TableBank via Psarpei X152-FPN config", + "name": "TableBank_X152_Psarpei.yaml", + "size": 534, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152_Psarpei.yaml" + }, + { + "description": "TableBank via Psarpei X152-FPN weights", + "name": "TableBank_X152_Psarpei.pth", + "size": 1103832675, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/TableBank_X152_Psarpei.pth" }, { "description": "PubLayNet via hpanwar08 R50-FPN config", "name": "PubLayNet_R_50_FPN_3x.yaml", "size": 388, - "url": "https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_R_50_FPN_3x.yaml" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x.yaml" }, { "description": "PubLayNet via hpanwar08 R50-FPN weights", "name": "PubLayNet_R_50_FPN_3x.pth", - "path_in_archive": "model_final_trimmed.pth", - "size": 176249992, - "url": "https://www.dropbox.com/sh/44ez171b2qaocd2/AAB0huidzzOXeo99QdplZRjua" + "size": 176249718, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x.pth" }, { "description": "PubLayNet via hpanwar08 R101-FPN config", "name": "PubLayNet_R_101_FPN_3x.yaml", "size": 392, - "url": "https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_R_101_FPN_3x.yaml" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x.yaml" }, { "description": "PubLayNet via hpanwar08 R101-FPN weights", "name": "PubLayNet_R_101_FPN_3x.pth", - "path_in_archive": "model_final.pth", - "size": 254055290, - "url": "https://www.dropbox.com/sh/wgt9skz67usliei/AAD9n6qbsyMz1Y3CwpZpHXCpa" + "size": 503147199, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x.pth" }, { "description": "PubLayNet via hpanwar08 X101-FPN config", "name": "PubLayNet_X_101_32x8d_FPN_3x.yaml", "size": 592, - "url": "https://github.com/hpanwar08/detectron2/raw/master/configs/DLA_mask_rcnn_X_101_32x8d_FPN_3x.yaml" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_X_101_32x8d_FPN_3x.yaml" }, { "description": "PubLayNet via hpanwar08 X101-FPN weights", "name": "PubLayNet_X_101_32x8d_FPN_3x.pth", - "path_in_archive": "model_final_trimmed.pth", - "size": 431414189, - "url": "https://www.dropbox.com/sh/1098ym6vhad4zi6/AABe16eSdY_34KGp52W0ruwha" + "size": 429840864, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_X_101_32x8d_FPN_3x.pth" }, { "description": "PubLayNet via JPLeoRX R50-FPN config", "name": "PubLayNet_R_50_FPN_3x_JPLeoRX.yaml", - "size": 192, - "url": "https://github.com/facebookresearch/detectron2/raw/main/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" + "size": 388, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x_JPLeoRX.yaml" }, { "description": "PubLayNet via JPLeoRX R50-FPN weights", "name": "PubLayNet_R_50_FPN_3x_JPLeoRX.pth", - "size": 7692, - "url": "https://keybase.pub/jpleorx/detectron2-publaynet/mask_rcnn_R_50_FPN_3x/model_final.pth" + "size": 176299422, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_50_FPN_3x_JPLeoRX.pth" }, { "description": "PubLayNet via JPLeoRX R101-FPN config", "name": "PubLayNet_R_101_FPN_3x_JPLeoRX.yaml", - "size": 194, - "url": "https://github.com/facebookresearch/detectron2/raw/main/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml" + "size": 392, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x_JPLeoRX.yaml" }, { "description": "PubLayNet via JPLeoRX R101-FPN weights", "name": "PubLayNet_R_101_FPN_3x_JPLeoRX.pth", - "size": 7696, - "url": "https://keybase.pub/jpleorx/detectron2-publaynet/mask_rcnn_R_101_FPN_3x/model_final.pth" + "size": 252572745, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PubLayNet_R_101_FPN_3x_JPLeoRX.pth" + }, + { + "description": "Modern Magazines via Jambo-sudo X101-FPN (pre-trained on PubLayNet, fine-tuned on 500 p. 20th cent. magazines) config", + "name": "Jambo-sudo_X101.yaml", + "size": 592, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Jambo-sudo_X101.yaml" + }, + { + "description": "Modern Magazines via Jambo-sudo X101-FPN (pre-trained on PubLayNet, fine-tuned on 500 p. 20th cent. magazines) weights", + "name": "Jambo-sudo_X101.pth", + "size": 856430002, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Jambo-sudo_X101.pth" + }, + { + "description": "PRImALayout via LayoutLM R50-FPN config", + "name": "PRImALayout_R50.yaml", + "size": 934, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PRImALayout_R50.yaml" + }, + { + "description": "PRImALayout via LayoutLM R50-FPN weights", + "name": "PRImALayout_R50.pth", + "size": 351229486, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/PRImALayout_R50.pth" }, { "description": "DocBank via LayoutLM X101-FPN config", "name": "DocBank_X101.yaml", - "path_in_archive": "X101/X101.yaml", - "size": 526, - "url": "https://layoutlm.blob.core.windows.net/docbank/model_zoo/X101.zip" + "size": 523, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/DocBank_X101.yaml" }, { "description": "DocBank via LayoutLM X101-FPN config", "name": "DocBank_X101.pth", - "path_in_archive": "X101/model.pth", "size": 835606605, - "url": "https://layoutlm.blob.core.windows.net/docbank/model_zoo/X101.zip" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/DocBank_X101.pth" }, { "description": "NewspaperNavigator via LayoutParser R50-PanopticFPN config", "name": "NewspaperNavigator_R_50_PFPN_3x.yaml", - "size": 5434, - "url": "https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml&dl=1" + "size": 330226761, + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/NewspaperNavigator_R_50_PFPN_3x.yaml" }, { "description": "NewspaperNavigator via LayoutParser R50-PanopticFPN weights", "name": "NewspaperNavigator_R_50_PFPN_3x.pth", "size": 330226761, - "url": "https://www.dropbox.com/s/6ewh6g8rqt2ev3a/model_final.pth&dl=1" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/NewspaperNavigator_R_50_PFPN_3x.pth" }, { "description": "MathFormulaDetection via LayoutParser R50-FPN config", "name": "Math_R_50_FPN_3x.yaml", "size": 5632, - "url": "https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Math_R_50_FPN_3x.yaml" }, { "description": "MathFormulaDetection via LayoutParser R50-FPN weights", "name": "Math_R_50_FPN_3x.pth", "size": 330084629, - "url": "https://www.dropbox.com/s/7xel0i3iqpm2p8y/model_final.pth?dl=1" + "url": "https://github.com/bertsky/ocrd_detectron2/releases/download/v0.1.7/Math_R_50_FPN_3x.pth" } - ], - "steps": [ - "layout/segmentation/region" ] }, - "ocrd-dinglehopper": { + "ocrd-doxa-binarize": { + "executable": "ocrd-doxa-binarize", "categories": [ - "Quality assurance" - ], - "description": "Evaluate OCR text against ground truth with dinglehopper", - "executable": "ocrd-dinglehopper", - "input_file_grp": [ - "OCR-D-GT-PAGE", - "OCR-D-OCR" - ], - "output_file_grp": [ - "OCR-D-OCR-EVAL" + "Image preprocessing" ], - "parameters": { - "metrics": { - "default": true, - "description": "Enable/disable metrics and green/red", - "type": "boolean" - }, - "textequiv_level": { - "default": "region", - "description": "PAGE XML hierarchy level to extract the text from", - "enum": [ - "region", - "line" - ], - "type": "string" - } - }, "steps": [ - "recognition/text-recognition" - ] - }, - "ocrd-eynollah-segment": { - "categories": [ - "Layout analysis" + "preprocessing/optimization/binarization" ], - "description": "Segment page into regions and lines and do reading order detection with eynollah", - "executable": "ocrd-eynollah-segment", + "description": "binarize via locally adaptive thresholding", "input_file_grp": [ "OCR-D-IMG", "OCR-D-SEG-PAGE", - "OCR-D-GT-SEG-PAGE" + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" ], "output_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", "OCR-D-SEG-LINE" ], "parameters": { - "allow_scaling": { - "default": false, - "description": "check the resolution against the number of detected columns and if needed, scale the image up or down during layout detection (heuristic to improve quality and performance)", - "type": "boolean" - }, - "curved_line": { - "default": false, - "description": "try to return contour of textlines instead of just rectangle bounding box. Needs more processing time", - "type": "boolean" - }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); ignored if <= 0 (with fall-back 230)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 }, - "full_layout": { - "default": true, - "description": "Try to detect all element subtypes, including drop-caps and headings", - "type": "boolean" + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" }, - "headers_off": { - "default": false, - "description": "ignore the special role of headings during reading order detection", - "type": "boolean" + "algorithm": { + "type": "string", + "enum": [ + "Otsu", + "Bernsen", + "Niblack", + "Sauvola", + "Wolf", + "Gatos", + "NICK", + "Su", + "Singh", + "Bataineh", + "ISauvola", + "WAN" + ], + "default": "ISauvola", + "description": "Thresholding algorithm to use." }, - "models": { - "cacheable": true, - "content-type": "text/directory", - "description": "Path to directory containing models to be used (See https://qurator-data.de/eynollah)", - "format": "file", - "required": true, - "type": "string" + "parameters": { + "type": "object", + "default": {}, + "description": "Dictionary of algorithm-specific parameters. Unless overridden here, the following defaults are used: \nBernsen:\t{'window': 75, 'threshold': 100, 'contrast-limit': 25}\nNICK:\t{'window': 75, 'k': -0.2}\nNiblack:\t{'window': 75, 'k': 0.2}\nSingh:\t{'window': 75, 'k', 0.2}\nGatos:\t{'glyph': 60}\nSauvola:\t{'window': 75, 'k': 0.2}\nWolf:\t{'window': 75, 'k': 0.2}\nWAN:\t{'window': 75, 'k': 0.2}\nSu:\t{'window': 0 (based on stroke size), 'minN': windowSize (roughly based on size of window)}\n\n(window/glyph sizes are in px, threshold/limits in uint8 [0,255])" } - }, - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line" - ] + } }, "ocrd-fileformat-transform": { + "executable": "ocrd-fileformat-transform", + "description": "Convert between OCR file formats", "categories": [ "Image preprocessing" ], - "description": "Convert between OCR file formats", - "executable": "ocrd-fileformat-transform", + "steps": [ + "preprocessing/optimization" + ], "input_file_grp": [ "OCR-D-OCR-PAGE", "OCR-D-OCR-ALTO", @@ -1842,14 +2006,10 @@ "OCR-D-OCR-HOCR" ], "parameters": { - "ext": { - "default": "", - "description": "Output extension. Set to empty string to derive extension from the media type.", - "type": "string" - }, "from-to": { - "default": "page alto", "description": "Transformation scenario, see ocr-fileformat -L", + "type": "string", + "default": "page alto", "enum": [ "abbyy hocr", "abbyy page", @@ -1872,25 +2032,97 @@ "page page2019", "page text", "tei hocr" - ], - "type": "string" + ] + }, + "ext": { + "description": "Output extension. Set to empty string to derive extension from the media type.", + "type": "string", + "default": "" }, "script-args": { - "default": "", "description": "Arguments to Saxon (for XSLT transformations) or to transformation script", - "type": "string" + "type": "string", + "default": "" } - }, + } + }, + "ocrd-froc": { + "executable": "ocrd-froc", + "description": "Recognise font family/shape (annotating TextStyle) along with text (annotating TextEquiv)", + "categories": [ + "Text recognition and optimization" + ], "steps": [ - "preprocessing/optimization" - ] + "recognition/font-identification", + "recognition/text-recognition" + ], + "input_file_grp": [ + "OCR-D-SEG" + ], + "output_file_grp": [ + "OCR-D-OCR" + ], + "parameters": { + "method": { + "description": "The method to use for text recognition", + "type": "string", + "enum": [ + "none", + "SelOCR", + "COCR", + "adaptive" + ], + "default": "none" + }, + "network": { + "description": "The file name of the neural network to use, including sufficient path information. Defaults to the model bundled with ocrd_froc.", + "type": "string", + "required": false + }, + "fast_cocr": { + "description": "Whether to use optimization steps on the COCR strategy", + "type": "boolean", + "default": true + }, + "adaptive_treshold": { + "description": "Treshold of certitude needed to use SelOCR when using the adaptive strategy", + "type": "number", + "format": "integer", + "default": 95 + }, + "font_class_priors": { + "description": "List of font classes which are known to be present on the data when using the adaptive/SelOCR strategies. When this option is specified, every font classes not included will be ignored. If 'other' is included in the list, font classification will not be outputted and a generic model will be used for transcriptions.", + "type": "array", + "items": { + "type": "string", + "enum": [ + "antiqua", + "bastarda", + "fraktur", + "textura", + "schwabacher", + "greek", + "italic", + "hebrew", + "gotico-antiqua", + "manuscript", + "rotunda", + "other" + ] + }, + "default": [] + } + } }, "ocrd-im6convert": { + "executable": "ocrd-im6convert", "categories": [ "Image preprocessing" ], + "steps": [ + "preprocessing/optimization" + ], "description": "Convert and transform images", - "executable": "ocrd-im6convert", "input_file_grp": [ "OCR-D-IMG" ], @@ -1899,36 +2131,36 @@ ], "parameters": { "input-options": { - "default": "", + "type": "string", "description": "e.g. -density 600x600 -wavelet-denoise 1%x0.1", - "type": "string" + "default": "" }, "output-format": { + "type": "string", "description": "Desired media type of output", + "required": true, "enum": [ "image/tiff", "image/jp2", "image/png" - ], - "required": true, - "type": "string" + ] }, "output-options": { - "default": "", + "type": "string", "description": "e.g. -resample 300x300 -alpha deactivate -normalize -despeckle -noise 2 -negate -morphology close diamond", - "type": "string" + "default": "" } - }, - "steps": [ - "preprocessing/optimization" - ] + } }, "ocrd-keraslm-rate": { + "executable": "ocrd-keraslm-rate", "categories": [ "Text recognition and optimization" ], + "steps": [ + "recognition/text-recognition" + ], "description": "Rate elements of the text with a character-level LSTM language model in Keras", - "executable": "ocrd-keraslm-rate", "input_file_grp": [ "OCR-D-OCR-TESS", "OCR-D-OCR-KRAK", @@ -1942,52 +2174,45 @@ "OCR-D-COR-LM" ], "parameters": { - "alternative_decoding": { - "default": true, - "description": "whether to process all TextEquiv alternatives, finding the best path via beam search, and delete each non-best alternative", - "type": "boolean" - }, - "beam_width": { - "default": 10, - "description": "maximum number of best partial paths to consider during search with alternative_decoding", - "format": "integer", - "type": "number" - }, - "lm_weight": { - "default": 0.5, - "description": "share of the LM scores over the input confidences", - "format": "float", - "type": "number" - }, "model_file": { - "cacheable": true, + "type": "string", + "format": "uri", "content-type": "application/x-hdf;subtype=bag", "description": "path of h5py weight/config file for model trained with keraslm", - "format": "uri", "required": true, - "type": "string" + "cacheable": true }, "textequiv_level": { - "default": "glyph", - "description": "PAGE XML hierarchy level to evaluate TextEquiv sequences on", + "type": "string", "enum": [ "region", "line", "word", "glyph" ], - "type": "string" + "default": "glyph", + "description": "PAGE XML hierarchy level to evaluate TextEquiv sequences on" + }, + "alternative_decoding": { + "type": "boolean", + "description": "whether to process all TextEquiv alternatives, finding the best path via beam search, and delete each non-best alternative", + "default": true + }, + "beam_width": { + "type": "number", + "format": "integer", + "description": "maximum number of best partial paths to consider during search with alternative_decoding", + "default": 10 + }, + "lm_weight": { + "type": "number", + "format": "float", + "description": "share of the LM scores over the input confidences", + "default": 0.5 } - }, - "steps": [ - "recognition/text-recognition" - ] + } }, "ocrd-kraken-binarize": { - "categories": [ - "Image preprocessing" - ], - "description": "Binarize images with kraken", "executable": "ocrd-kraken-binarize", "input_file_grp": [ "OCR-D-IMG", @@ -1998,113 +2223,27 @@ "output_file_grp": [ "OCR-D-PRE-BIN" ], + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "description": "Binarize images with kraken", "parameters": { "level-of-operation": { - "default": "page", "description": "level-of-operation", + "type": "string", + "default": "page", "enum": [ "page", "block", "line" - ], - "type": "string" - } - }, - "steps": [ - "preprocessing/optimization/binarization" - ] - }, - "ocrd-kraken-recognize": { - "categories": [ - "Text recognition and optimization" - ], - "description": "OCR with kraken", - "executable": "ocrd-kraken-recognize", - "input_file_grp": [ - "OCR-D-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-OCR-KRAK" - ], - "parameters": { - "bidi_reordering": { - "default": true, - "description": "Reorder classes in the ocr_record according to the Unicode bidirectional algorithm for correct display.", - "type": "boolean" - }, - "device": { - "default": "cpu", - "description": "CUDA ID (e.g. 'cuda:0') for computation on GPU, or 'cpu' to run on CPU only", - "type": "string" - }, - "model": { - "cacheable": true, - "content-type": "application/python-cpickle", - "default": "en_best.mlmodel", - "description": "OCR model to recognize with", - "format": "uri", - "type": "string" - }, - "pad": { - "default": 16, - "description": "Extra blank padding to the left and right of text line.", - "format": "integer", - "type": "number" + ] } - }, - "resources": [ - { - "description": "19th and 20th century German Fraktur; https://github.com/UB-Mannheim/AustrianNewspapers/wiki/Training-with-Kraken", - "name": "austriannewspapers.mlmodel", - "parameter_usage": "without-extension", - "size": 16243476, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/austriannewspapers/20220520/austriannewspapers_best.mlmodel" - }, - { - "description": "19th and 20th century German Fraktur ('Deutscher Reichsanzeiger'); https://github.com/UB-Mannheim/reichsanzeiger-gt/wiki/Training-with-Kraken", - "name": "reichsanzeiger.mlmodel", - "parameter_usage": "without-extension", - "size": 16358636, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/reichsanzeiger-gt/reichsanzeiger_best.mlmodel" - }, - { - "description": "mostly 19th century German Fraktur; https://github.com/UB-Mannheim/digitue-gt/wiki/Training-with-Kraken", - "name": "digitue.mlmodel", - "parameter_usage": "without-extension", - "size": 16364343, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digitue-gt/digitue_best.mlmodel" - }, - { - "description": "16th century German Gothic; https://github.com/UB-Mannheim/digi-gt/wiki/Training", - "name": "luther.mlmodel", - "parameter_usage": "without-extension", - "size": 16305851, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digi-gt/luther_best.mlmodel" - }, - { - "description": "20th century typewriter http://idb.ub.uni-tuebingen.de/opendigi/walz_1976, pretrained on austriannewspapers.mlmodel", - "name": "typewriter.mlmodel", - "parameter_usage": "without-extension", - "size": 16364780, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/typewriter/typewriter.mlmodel" - }, - { - "description": "This model has been trained on a large corpus of modern printed English text augmented with ~10000 lines of historical pages", - "name": "en_best.mlmodel", - "parameter_usage": "without-extension", - "size": 2930723, - "url": "https://zenodo.org/record/2577813/files/en_best.mlmodel?download=1" - } - ], - "steps": [ - "recognition/text-recognition" - ] + } }, "ocrd-kraken-segment": { - "categories": [ - "Layout analysis" - ], - "description": "Block segmentation with kraken", "executable": "ocrd-kraken-segment", "input_file_grp": [ "OCR-D-IMG", @@ -2115,14 +2254,61 @@ "OCR-D-SEG-REGION", "OCR-D-SEG-LINE" ], + "categories": [ + "Layout analysis" + ], + "steps": [ + "layout/segmentation/region" + ], + "description": "Block segmentation with kraken", "parameters": { + "text_direction": { + "type": "string", + "description": "Sets principal text direction", + "enum": [ + "horizontal-lr", + "horizontal-rl", + "vertical-lr", + "vertical-rl" + ], + "default": "horizontal-lr" + }, + "maxcolseps": { + "description": "Maximum number of column separators. Set to 0 for single-column text to avoid unnecessary computation.", + "type": "number", + "format": "integer", + "default": 2 + }, + "scale": { + "description": "mean xheight size of glyphs (guessed if zero)", + "type": "number", + "format": "float", + "default": 0 + }, "black_colseps": { - "default": false, "description": "Whether column separators are assumed to be vertical black lines or not", - "type": "boolean" + "type": "boolean", + "default": false + }, + "remove_hlines": { + "description": "Remove horizontal colseps before segmentation", + "type": "boolean", + "default": true + }, + "blla_model": { + "description": "Model used for baseline detection and page segmentation. Ignored if use_legacy.", + "type": "string", + "format": "uri", + "content-type": "application/python-cpickle", + "cacheable": true, + "default": "blla.mlmodel" }, "blla_classes": { + "description": "Class mapping for the region types trained into blla_model.", + "type": "object", + "minProperties": 2, "additionalProperties": { + "type": "string", "enum": [ "TextRegion", "ImageRegion", @@ -2139,179 +2325,237 @@ "NoiseRegion", "UnknownRegion", "CustomRegion" - ], - "type": "string" + ] }, "default": { - "advert": "AdvertRegion", - "chart": "ChartRegion", - "chem": "ChemRegion", - "custom": "CustomRegion", - "graphic": "GraphicRegion", + "text": "TextRegion", "image": "ImageRegion", "line drawing": "LineDrawingRegion", + "graphic": "GraphicRegion", + "table": "TableRegion", + "chart": "ChartRegion", "map": "MapRegion", + "separator": "SeparatorRegion", "maths": "MathsRegion", + "chem": "ChemRegion", "music": "MusicRegion", + "advert": "AdvertRegion", "noise": "NoiseRegion", - "separator": "SeparatorRegion", - "table": "TableRegion", - "text": "TextRegion", - "unknown": "UnknownRegion" - }, - "description": "Class mapping for the region types trained into blla_model.", - "minProperties": 2, - "type": "object" - }, - "blla_model": { - "cacheable": true, - "content-type": "application/python-cpickle", - "default": "blla.mlmodel", - "description": "Model used for baseline detection and page segmentation. Ignored if use_legacy.", - "format": "uri", - "type": "string" + "unknown": "UnknownRegion", + "custom": "CustomRegion" + } }, "device": { - "default": "cpu", - "description": "GPU ID or 'cpu' to run on CPU only", - "type": "string" - }, - "maxcolseps": { - "default": 2, - "description": "Maximum number of column separators. Set to 0 for single-column text to avoid unnecessary computation.", - "format": "integer", - "type": "number" - }, - "remove_hlines": { - "default": true, - "description": "Remove horizontal colseps before segmentation", - "type": "boolean" - }, - "scale": { - "default": 0, - "description": "mean xheight size of glyphs (guessed if zero)", - "format": "float", - "type": "number" - }, - "text_direction": { - "default": "horizontal-lr", - "description": "Sets principal text direction", - "enum": [ - "horizontal-lr", - "horizontal-rl", - "vertical-lr", - "vertical-rl" - ], - "type": "string" + "description": "CUDA ID (e.g. 'cuda:0') for computation on GPU (if available), or 'cpu' to run on CPU only", + "type": "string", + "default": "cuda:0" }, "use_legacy": { - "default": false, "description": "Use legacy box segmenter as opposed to neural net baseline segmenter", - "type": "boolean" + "type": "boolean", + "default": false } }, "resources": [ { - "description": "Pretrained baseline segmentation model", + "url": "https://github.com/mittagessen/kraken/raw/main/kraken/blla.mlmodel", + "size": 5047020, "name": "blla.mlmodel", "parameter_usage": "without-extension", - "size": 5047020, - "url": "https://github.com/mittagessen/kraken/raw/master/kraken/blla.mlmodel" + "description": "Pretrained baseline segmentation model" } - ], - "steps": [ - "layout/segmentation/region" ] }, - "ocrd-ocropy-segment": { - "categories": [ - "Image preprocessing" - ], - "description": "Segment page", - "executable": "ocrd-ocropy-segment", + "ocrd-kraken-recognize": { + "executable": "ocrd-kraken-recognize", "input_file_grp": [ - "OCR-D-IMG-BIN" + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-SEG-LINE" + "OCR-D-OCR-KRAK" + ], + "categories": [ + "Text recognition and optimization" ], + "steps": [ + "recognition/text-recognition" + ], + "description": "OCR with kraken", "parameters": { - "csminaspect": { - "default": 1.1, - "description": "has an effect", - "type": "number" - }, - "csminheight": { - "default": 10, - "description": "has an effect", - "type": "number" - }, - "expand": { - "default": 3, - "description": "has an effect", - "type": "number" - }, - "hscale": { - "default": 1.0, - "description": "has an effect", - "type": "number" - }, - "maxcolseps": { - "default": 3, - "description": "has an effect", - "type": "number" + "model": { + "description": "OCR model to recognize with", + "type": "string", + "format": "uri", + "content-type": "application/python-cpickle", + "cacheable": true, + "default": "en_best.mlmodel" }, - "maxseps": { - "default": 0, - "description": "has an effect", - "type": "number" + "pad": { + "description": "Extra blank padding to the left and right of text line.", + "type": "number", + "format": "integer", + "default": 16 }, - "noise": { - "default": 8, - "description": "has an effect", - "type": "number" + "bidi_reordering": { + "description": "Reorder classes in the ocr_record according to the Unicode bidirectional algorithm for correct display.", + "type": "boolean", + "default": true }, - "pad": { - "default": 3, - "description": "has an effect", - "type": "number" + "device": { + "description": "CUDA ID (e.g. 'cuda:0') for computation on GPU (if available), or 'cpu' to run on CPU only", + "type": "string", + "default": "cuda:0" + } + }, + "resources": [ + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/austriannewspapers/20220520/austriannewspapers_best.mlmodel", + "size": 16243476, + "name": "austriannewspapers.mlmodel", + "parameter_usage": "without-extension", + "description": "19th and 20th century German Fraktur; https://github.com/UB-Mannheim/AustrianNewspapers/wiki/Training-with-Kraken" }, - "scale": { - "default": 0.0, - "description": "has an effect", - "type": "number" + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/reichsanzeiger-gt/reichsanzeiger_best.mlmodel", + "size": 16358636, + "name": "reichsanzeiger.mlmodel", + "parameter_usage": "without-extension", + "description": "19th and 20th century German Fraktur ('Deutscher Reichsanzeiger'); https://github.com/UB-Mannheim/reichsanzeiger-gt/wiki/Training-with-Kraken" }, - "sepwiden": { - "default": 10, - "description": "has an effect", - "type": "number" + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digitue-gt/digitue_best.mlmodel", + "size": 16364343, + "name": "digitue.mlmodel", + "parameter_usage": "without-extension", + "description": "mostly 19th century German Fraktur; https://github.com/UB-Mannheim/digitue-gt/wiki/Training-with-Kraken" }, - "threshold": { - "default": 0.2, - "description": "has an effect", - "type": "number" + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/digi-gt/luther_best.mlmodel", + "size": 16305851, + "name": "luther.mlmodel", + "parameter_usage": "without-extension", + "description": "16th century German Gothic; https://github.com/UB-Mannheim/digi-gt/wiki/Training" }, - "usegauss": { - "default": false, - "description": "has an effect", - "type": "boolean" + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/kraken/typewriter/typewriter.mlmodel", + "size": 16364780, + "name": "typewriter.mlmodel", + "parameter_usage": "without-extension", + "description": "20th century typewriter http://idb.ub.uni-tuebingen.de/opendigi/walz_1976, pretrained on austriannewspapers.mlmodel" }, - "vscale": { - "default": 1.0, - "description": "has an effect", - "type": "number" + { + "url": "https://zenodo.org/record/2577813/files/en_best.mlmodel?download=1", + "size": 2930723, + "name": "en_best.mlmodel", + "parameter_usage": "without-extension", + "description": "This model has been trained on a large corpus of modern printed English text augmented with ~10000 lines of historical pages" } - }, - "steps": [ - "layout/segmentation/region" ] }, - "ocrd-olena-binarize": { + "ocrd-neat-export": { + "executable": "ocrd-neat-export", + "description": "Convert PAGE-XML to neat-loadable TSV", + "categories": [ + "Format-Conversion" + ], + "steps": [ + "format-conversion" + ], + "input_file_grp": [ + "INPUT" + ], + "output_file_grp": [ + "OUTPUT" + ], + "parameters": { + "iiif_url_template": { + "type": "string", + "description": "URL template for lookup of images via IIIF based on {{ unique_identifier }}, {{ page_id }}, {{ page_no }} and {{ PPN }}. 'left', 'top', 'right', 'bottom', 'width', and 'height' are replaced by the neat JS.", + "default": "https://content.staatsbibliothek-berlin.de/dc/{{ PPN }}-{{ page_no }}/left,top,width,height/full/0/default.jpg" + }, + "scale_filegrp": { + "type": "string", + "description": "If the OCR was run on images with a different resolution thant the 'full' IIIF size, use the images in this file group to scale. Set to empty string to disable", + "default": "" + }, + "noproxy": { + "type": "boolean", + "description": "Disable proxy if set", + "default": true + } + } + }, + "ocrd-neat-import": { + "executable": "ocrd-neat-export", + "description": "Re-integrate TSV into PAGE-XML", + "categories": [ + "Format-Conversion" + ], + "steps": [ + "format-conversion" + ], + "input_file_grp": [ + "PAGE-GRP,TSV-GRP" + ], + "output_file_grp": [ + "OUTPUT" + ], + "parameters": { + "keep_words": { + "type": "boolean", + "description": "After updating the line TextEquiv, remove (false) or keep (true) existing and probably inconsistent pc:Word", + "default": false + } + } + }, + "ocrd-olahd-client": { + "executable": "ocrd-olahd-client", + "description": "Post a workspace to OLA-HD", "categories": [ "Image preprocessing" ], - "description": "popular binarization algorithms implemented by Olena/SCRIBO, wrapped for OCR-D (on page level only)", + "steps": [ + "preprocessing/optimization" + ], + "input_file_grp": [], + "output_file_grp": [], + "parameters": { + "endpoint": { + "description": "URL of the OLA-HD instance", + "type": "string", + "required": true + }, + "strict": { + "description": "Whether to log or raise bagging issues", + "type": "boolean", + "default": true + }, + "username": { + "description": "Username", + "type": "string", + "required": true + }, + "password": { + "description": "Password", + "type": "string", + "required": true + }, + "pid_previous_version": { + "description": "PID of the previous version of this work, already stored in OLA-HD", + "type": "string", + "required": false + } + } + }, + "ocrd-olena-binarize": { "executable": "ocrd-olena-binarize", + "description": "popular binarization algorithms implemented by Olena/SCRIBO, wrapped for OCR-D (on page level only)", + "categories": [ + "Image preprocessing" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], "input_file_grp": [ "OCR-D-SEG-BLOCK", "OCR-D-SEG-LINE", @@ -2324,15 +2568,10 @@ "OCR-D-SEG-WORD" ], "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", - "format": "float", - "type": "number" - }, "impl": { - "default": "sauvola-ms-split", "description": "The name of the actual binarization algorithm", + "type": "string", + "default": "sauvola-ms-split", "enum": [ "sauvola", "sauvola-ms", @@ -2343,37 +2582,37 @@ "niblack", "singh", "otsu" - ], - "type": "string" + ] }, "k": { - "default": 0.34, "description": "Sauvola's formulae parameter (foreground weight decreases with k); for Multiscale, multiplied to yield default 0.2/0.3/0.5; for Singh, multiplied to yield default 0.06; for Niblack, multiplied to yield default -0.2; for Wolf/Kim, used directly; for Otsu, does not apply", "format": "float", - "type": "number" + "type": "number", + "default": 0.34 }, "win-size": { - "default": 0, "description": "The (odd) window size in pixels; when zero (default), set to DPI (or 301); for Otsu, does not apply", + "type": "number", "format": "integer", - "type": "number" + "default": 0 + }, + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 } - }, - "steps": [ - "preprocessing/optimization/binarization" - ] - }, - "ocrd-page2tei": { - "description": "Convert PAGE-XML to TEI-C", - "executable": "ocrd-page2tei", - "parameters": {} + } }, "ocrd-pagetopdf": { + "executable": "ocrd-pagetopdf", + "description": "Convert text and layout annotations to PDF format (overlaying original image with text layer and polygon outlines)", "categories": [ "Long-term preservation" ], - "description": "Convert text and layout annotations to PDF format (overlaying original image with text layer and polygon outlines)", - "executable": "ocrd-pagetopdf", + "steps": [ + "postprocessing/format-conversion" + ], "input_file_grp": [ "OCR-D-OCR-PAGE" ], @@ -2381,43 +2620,56 @@ "OCR-D-OCR-PDF" ], "parameters": { - "ext": { - "default": ".pdf", - "description": "Output filename extension", - "type": "string" - }, "font": { - "content-type": "application/x-font-ttf", - "default": "", "description": "Font file to be used in PDF file. If unset, AletheiaSans.ttf is used. (Make sure to pick a font which covers all glyphs!)", + "type": "string", "format": "uri", - "type": "string" + "content-type": "application/x-font-ttf", + "default": "" }, - "multipage": { + "outlines": { + "description": "What segment hierarchy to draw coordinate outlines for. If unset, no outlines are drawn.", + "type": "string", "default": "", - "description": "Merge all PDFs into one mulitpage file. The value is used as filename for the pdf.", - "type": "string" - }, - "negative2zero": { - "default": false, - "description": "Set all negative box values to 0", - "type": "boolean" + "enum": [ + "", + "region", + "line", + "word", + "glyph" + ] }, - "outlines": { + "textequiv_level": { + "description": "What segment hierarchy level to render text output from. If unset, no text is rendered.", + "type": "string", "default": "", - "description": "What segment hierarchy to draw coordinate outlines for. If unset, no outlines are drawn.", "enum": [ "", "region", "line", "word", "glyph" - ], - "type": "string" + ] + }, + "negative2zero": { + "description": "Set all negative box values to 0", + "type": "boolean", + "default": false + }, + "ext": { + "description": "Output filename extension", + "type": "string", + "default": ".pdf" + }, + "multipage": { + "description": "Merge all PDFs into one mulitpage file. The value is used as filename for the pdf.", + "type": "string", + "default": "" }, "pagelabel": { - "default": "pageId", "description": "Parameter for 'multipage': Set the page information, which will be used as pagelabel. Default is 'pageId', e.g. the option 'pagenumber' will create numbered pagelabel consecutively", + "type": "string", + "default": "pageId", "enum": [ "pagenumber", "pageId", @@ -2426,368 +2678,242 @@ "local_filename", "ID", "url" - ], - "type": "string" + ] }, "script-args": { - "default": "", "description": "Extra arguments to PageToPdf (see https://github.com/PRImA-Research-Lab/prima-page-to-pdf)", - "type": "string" - }, - "textequiv_level": { - "default": "", - "description": "What segment hierarchy level to render text output from. If unset, no text is rendered.", - "enum": [ - "", - "region", - "line", - "word", - "glyph" - ], - "type": "string" + "type": "string", + "default": "" } - }, + } + }, + "ocrd-repair-inconsistencies": { + "executable": "ocrd-repair-inconsistencies", + "categories": [ + "Layout analysis" + ], + "description": "Re-order glyphs/words/lines top-down-left-right when textually inconsistent with their parents", + "input_file_grp": [ + "OCR-D-SEG-BLOCK" + ], + "output_file_grp": [ + "OCR-D-SEG-BLOCK-FIXED" + ], "steps": [ - "postprocessing/format-conversion" + "layout/segmentation/line", + "layout/segmentation/word", + "layout/segmentation/glyph" ] }, - "ocrd-pc-segmentation": { + "ocrd-segment-repair": { + "executable": "ocrd-segment-repair", "categories": [ "Layout analysis" ], - "description": "Segment page into regions using a pixel classifier based on a Fully Convolutional Network (FCN)", - "executable": "ocrd-pc-segmentation", + "description": "Analyse and repair region segmentation; at least ensure validity and consistency of coordinates.", "input_file_grp": [ - "OCR-D-IMG-BIN" + "OCR-D-IMG", + "OCR-D-SEG-BLOCK" ], "output_file_grp": [ "OCR-D-SEG-BLOCK" ], + "steps": [ + "layout/segmentation/region" + ], "parameters": { - "gpu_allow_growth": { - "default": false, - "description": "required for GPU use with some graphic cards (set to true, if you get CUDNN_INTERNAL_ERROR)", - "type": "boolean" + "simplify": { + "type": "number", + "format": "float", + "minimum": 0, + "default": 0, + "description": "Distance (in px) used to simplify all segment polygons. (Avoid values larger than xheight/scale, or corners will be chopped off.) Set to 0 to disable." }, - "model": { - "default": "__DEFAULT__", - "description": "trained model for pixel classifier", - "type": "string" + "plausibilize": { + "type": "boolean", + "default": false, + "description": "Identify and remove redundancies on text regions and text lines (deleting/merging/shrinking where overlaps occur)." }, - "overwrite_regions": { - "default": true, - "description": "remove existing layout and text annotation below the Page level", - "type": "boolean" + "plausibilize_merge_min_overlap": { + "type": "number", + "format": "float", + "minimum": 0.0, + "maximum": 1.0, + "default": 0.9, + "description": "When merging a region or line almost contained in another, require at least this ratio of area is shared with the other." }, - "resize_height": { - "default": 300, - "description": "scale down pixelclassifier output to this height for postprocessing (performance/quality tradeoff). Independent of training.", - "type": "integer" + "sanitize": { + "type": "boolean", + "default": false, + "description": "Shrink each region such that its coordinates become the minimal concave hull of its binary foreground. (Assumes that a perfect binarization is available.)" }, - "xheight": { - "default": 8, - "description": "height of character x in pixels used during training", - "type": "integer" + "sanitize_padding": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 5, + "description": "When shrinking a region, enlarge the resulting hull by this amount of pixels in each direction." } - }, - "steps": [ - "layout/segmentation/region" - ] + } }, - "ocrd-preprocess-image": { + "ocrd-segment-project": { + "executable": "ocrd-segment-project", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Convert or enhance images", - "executable": "ocrd-preprocess-image", + "description": "Project segment coordinates to their structural parents", "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-PAGE", - "OCR-D-SEG-REGION", - "OCR-D-SEG-LINE" + "OCR-D-SEG-BLOCK" ], "output_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-PAGE", - "OCR-D-SEG-REGION", - "OCR-D-SEG-LINE" + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation" ], "parameters": { - "command": { - "description": "shell command to operate on image files, with @INFILE as place-holder for the input file path, and @OUTFILE as place-holder for the output file path", - "required": true, - "type": "string" - }, - "input_feature_filter": { - "default": "", - "description": "comma-separated list of forbidden image features (e.g. binarized,despeckled)", - "type": "string" - }, - "input_feature_selector": { - "default": "", - "description": "comma-separated list of required image features (e.g. binarized,despeckled)", - "type": "string" - }, - "input_mimetype": { - "default": "image/png", - "description": "File format to save input images to (tool's expected input)", - "enum": [ - "image/bmp", - "application/postscript", - "image/gif", - "image/jpeg", - "image/jp2", - "image/png", - "image/x-portable-pixmap", - "image/tiff" - ], - "type": "string" - }, "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", + "type": "string", "enum": [ "page", + "table", "region", "line", - "word", - "glyph" + "word" ], - "type": "string" - }, - "output_feature_added": { - "description": "image feature(s) to be added after this operation (if multiple, separate by comma)", - "required": true, - "type": "string" + "default": "page", + "description": "hierarchy level which to assign new coordinates to" }, - "output_mimetype": { - "default": "image/png", - "description": "File format to load output images from (tool's expected output)", - "enum": [ - "image/bmp", - "application/postscript", - "image/gif", - "image/jpeg", - "image/jp2", - "image/png", - "image/x-portable-pixmap", - "image/tiff" - ], - "type": "string" + "padding": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 10, + "description": "margin (in px) to extend the hull in every direction" } - }, - "steps": [ - "preprocessing/optimization" - ] + } }, - "ocrd-repair-inconsistencies": { + "ocrd-segment-from-masks": { + "executable": "ocrd-segment-from-masks", "categories": [ "Layout analysis" ], - "description": "Re-order glyphs/words/lines top-down-left-right when textually inconsistent with their parents", - "executable": "ocrd-repair-inconsistencies", + "description": "Import region segmentation from mask images (segments filled with colors encoding classes). Input fileGrp format is `base,mask` (i.e. PAGE or original image files first, mask image files second).", "input_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-IMG", + "OCR-D-SEG-PAGE" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK-FIXED" + "OCR-D-SEG-BLOCK" ], "steps": [ - "layout/segmentation/line", - "layout/segmentation/word", - "layout/segmentation/glyph" - ] - }, - "ocrd-sbb-binarize": { - "categories": [ - "Image preprocessing" + "layout/segmentation/region" ], - "description": "Pixelwise binarization with selectional auto-encoders in Keras", - "executable": "ocrd-sbb-binarize", - "input_file_grp": [], - "output_file_grp": [], "parameters": { - "model": { - "content-type": "text/directory", - "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", - "format": "uri", - "required": true, - "type": "string" - }, - "operation_level": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region" - ], - "type": "string" - } - }, - "resources": [ - { - "description": "default models provided by github.com/qurator-spk", - "name": "default", - "path_in_archive": "model_2020_01_16", - "size": 562917559, - "type": "archive", - "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2020_01_16.zip" - }, - { - "description": "updated default models provided by github.com/qurator-spk", - "name": "default-2021-03-09", - "path_in_archive": ".", - "size": 133693693, - "type": "archive", - "url": "https://github.com/apacha/sbb_binarization/releases/download/pre-trained-models/model_2021_03_09.zip" + "colordict": { + "type": "object", + "default": { + "FFFFFF00": "", + "FFFFFFFF": "Border", + "8B4513FF": "TableRegion", + "4682B4FF": "AdvertRegion", + "FF8C00FF": "ChemRegion", + "9400D3FF": "MusicRegion", + "9ACDD2FF": "MapRegion", + "0000FFFF": "TextRegion", + "0000FFFA": "TextRegion:paragraph", + "0000FFF5": "TextRegion:heading", + "0000FFF0": "TextRegion:caption", + "0000FFEB": "TextRegion:header", + "0000FFE6": "TextRegion:footer", + "0000FFE1": "TextRegion:page-number", + "0000FFDC": "TextRegion:drop-capital", + "0000FFD7": "TextRegion:credit", + "0000FFD2": "TextRegion:floating", + "0000FFCD": "TextRegion:signature-mark", + "0000FFC8": "TextRegion:catch-word", + "0000FFC3": "TextRegion:marginalia", + "0000FFBE": "TextRegion:footnote", + "0000FFB9": "TextRegion:footnote-continued", + "0000FFB4": "TextRegion:endnote", + "0000FFAF": "TextRegion:TOC-entry", + "0000FFA5": "TextRegion:list-label", + "0000FFA0": "TextRegion:other", + "800080FF": "ChartRegion", + "800080FA": "ChartRegion:bar", + "800080F5": "ChartRegion:line", + "800080F0": "ChartRegion:pie", + "800080EB": "ChartRegion:scatter", + "800080E6": "ChartRegion:surface", + "800080E1": "ChartRegion:other", + "008000FF": "GraphicRegion", + "008000FA": "GraphicRegion:logo", + "008000F0": "GraphicRegion:letterhead", + "008000EB": "GraphicRegion:decoration", + "008000E6": "GraphicRegion:frame", + "008000E1": "GraphicRegion:handwritten-annotation", + "008000DC": "GraphicRegion:stamp", + "008000D7": "GraphicRegion:signature", + "008000D2": "GraphicRegion:barcode", + "008000CD": "GraphicRegion:paper-grow", + "008000C8": "GraphicRegion:punch-hole", + "008000C3": "GraphicRegion:other", + "00CED1FF": "ImageRegion", + "B8860BFF": "LineDrawingRegion", + "00BFFFFF": "MathsRegion", + "FF0000FF": "NoiseRegion", + "FF00FFFF": "SeparatorRegion", + "646464FF": "UnknownRegion", + "637C81FF": "CustomRegion" + }, + "description": "Mapping from color values in the input masks to region types to annotate; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped colors will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict.json output and colordict parameter of ocrd-segment-extract-pages." } - ], - "steps": [ - "preprocessing/optimization/binarization" - ] + } }, - "ocrd-sbb-textline-detector": { + "ocrd-segment-from-coco": { + "executable": "ocrd-segment-from-coco", "categories": [ "Layout analysis" ], - "description": "Printspace, region and textline detection", - "executable": "ocrd-sbb-textline-detector", + "description": "Import region segmentation from COCO detection format JSON (for all pages). Input fileGrp format is `base,COCO` (i.e. PAGE or original image files first, COCO file second).", "input_file_grp": [ - "OCR-D-IMG" + "OCR-D-IMG", + "OCR-D-SEG-PAGE" ], "output_file_grp": [ - "OCR-D-SBB-SEG-LINE" - ], - "parameters": { - "model": { - "cacheable": true, - "content-type": "text/directory", - "description": "Path to directory containing models to be used (See https://qurator-data.de/sbb_textline_detector/)", - "format": "uri", - "type": "string" - } - }, - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line" - ] - }, - "ocrd-segment-evaluate": { - "categories": [ - "Layout analysis" - ], - "description": "Compare segmentations", - "executable": "ocrd-segment-evaluate", - "input_file_grp": [ - "OCR-D-GT-SEG-BLOCK", "OCR-D-SEG-BLOCK" ], - "parameters": { - "for-categories": { - "default": "", - "description": "on region level, only compare these region types (comma-separated list; unless `ignore-subtype` is given, append subtypes via `.`; e.g. `TextRegion.page-number,TextRegion.marginalia`)", - "type": "string" - }, - "ignore-subtype": { - "default": false, - "description": "on region level, ignore @type differentiation (where applicable)", - "type": "boolean" - }, - "level-of-operation": { - "default": "region", - "description": "segment hierarchy level to compare GT and predictions at", - "enum": [ - "region", - "line" - ], - "type": "string" - }, - "only-fg": { - "default": false, - "description": "only overlap and compare the foregrounds in the binarized image", - "type": "boolean" - } - }, "steps": [ - "layout/analysis" - ] + "layout/segmentation/region" + ], + "parameters": {} }, - "ocrd-segment-extract-glyphs": { + "ocrd-segment-extract-pages": { + "executable": "ocrd-segment-extract-pages", "categories": [ "Image preprocessing" ], - "description": "Extract glyph segmentation as glyph images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", - "executable": "ocrd-segment-extract-glyphs", + "description": "Extract page segmentation as page images (deskewed according to `/Page/@orientation` and cropped+masked along `/Page/Border`) + JSON (including region coordinates/classes and meta-data), as binarized images, and as mask images (segments filled with colors encoding classes) + COCO detection format JSON (for all pages). Output fileGrp format is `raw[,binarized[,mask]]` (i.e. fall back to first group).", "input_file_grp": [ - "OCR-D-SEG-GLYPH", - "OCR-D-GT-SEG-GLYPH" + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE", + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" ], "output_file_grp": [ - "OCR-D-IMG-GLYPH" + "OCR-D-IMG-PAGE" ], - "parameters": { - "feature_filter": { - "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`).", - "type": "string" - }, - "mimetype": { - "default": "image/png", - "description": "File format to save extracted images in.", - "enum": [ - "image/bmp", - "application/postscript", - "image/gif", - "image/jpeg", - "image/jp2", - "image/png", - "image/x-portable-pixmap", - "image/tiff" - ], - "type": "string" - }, - "transparency": { - "default": true, - "description": "Add alpha channels with segment masks to the images", - "type": "boolean" - } - }, "steps": [ "layout/analysis" - ] - }, - "ocrd-segment-extract-lines": { - "categories": [ - "Image preprocessing" - ], - "description": "Extract line segmentation as line images + text file + JSON.", - "executable": "ocrd-segment-extract-lines", - "input_file_grp": [ - "OCR-D-SEG-LINE", - "OCR-D-GT-SEG-LINE" - ], - "output_file_grp": [ - "OCR-D-IMG-LINE" ], "parameters": { "feature_filter": { + "type": "string", "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`).", - "type": "string" - }, - "library-convention": { - "default": "none", - "description": "For xlsx extraction, to make line images hyperlinked, use this scheme in reconstructing presentation URLs of original pages. Libraries have different conventions in their METS files. Set to none to disable.", - "enum": [ - "slub", - "sbb", - "none" - ], - "type": "string" + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." }, "mimetype": { - "default": "image/png", - "description": "File format to save extracted images in.", + "type": "string", "enum": [ "image/bmp", "application/postscript", @@ -2798,195 +2924,109 @@ "image/x-portable-pixmap", "image/tiff" ], - "type": "string" - }, - "min-line-height": { - "default": 1, - "description": "Only extract lines that are at least this high (in px).", - "format": "integer", - "minimum": 1, - "type": "number" + "default": "image/png", + "description": "File format to save extracted images in." }, - "min-line-length": { - "default": 0, - "description": "Only extract lines with at least this many characters.", - "format": "integer", - "minimum": 0, - "type": "number" + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" }, - "min-line-width": { - "default": 1, - "description": "Only extract lines that are at least this wide (in px).", - "format": "integer", - "minimum": 1, - "type": "number" + "plot_overlay": { + "type": "boolean", + "default": false, + "description": "When generating mask images with `plot_segmasks`, instead of starting with a blank image and having layers and segments replace each other, start with the raw image and superimpose (alpha-composite) layers and segments." }, - "output-types": { - "default": [ - "text", - "json", - "xlsx" - ], - "description": "What kind of files to extract besides the line image itself (text/json files for each line, xlsx per page).", + "plot_segmasks": { + "type": "array", "items": { + "type": "string", "enum": [ - "text", - "json", - "xlsx" - ], - "type": "string" + "order", + "page", + "region", + "line", + "word", + "glyph" + ] }, - "type": "array" + "default": [ + "region" + ], + "description": "Generate mask images of the page segmentation in the last output fileGrp. Draw filled polygons for each specified PAGE hierarchy level in the list (in that order), where 'page' denotes the Border polygon, 'region' denotes Region types, 'line' denotes TextLine, 'word' denotes Word and 'glyph' denotes Glyph. Each type must be mapped in `colordict`. Where neighbors of the same type intersect, show a warning (unless `plot_overlay` is true). If 'order' is present, then draw arrows for reading order, too." }, - "transparency": { - "default": true, - "description": "Add alpha channels with segment masks to the images", - "type": "boolean" - } - }, - "steps": [ - "layout/analysis" - ] - }, - "ocrd-segment-extract-pages": { - "categories": [ - "Image preprocessing" - ], - "description": "Extract page segmentation as page images (deskewed according to `/Page/@orientation` and cropped+masked along `/Page/Border`) + JSON (including region coordinates/classes and meta-data), as binarized images, and as mask images (segments filled with colors encoding classes) + COCO detection format JSON (for all pages). Output fileGrp format is `raw[,binarized[,mask]]` (i.e. fall back to first group).", - "executable": "ocrd-segment-extract-pages", - "input_file_grp": [ - "OCR-D-SEG-PAGE", - "OCR-D-GT-SEG-PAGE", - "OCR-D-SEG-BLOCK", - "OCR-D-GT-SEG-BLOCK" - ], - "output_file_grp": [ - "OCR-D-IMG-PAGE" - ], - "parameters": { "colordict": { + "type": "object", "default": { "": "FFFFFF00", - "AdvertRegion": "4682B4FF", + "ReadingOrderLevel0": "DC143CFF", + "ReadingOrderLevel1": "9400D3FF", + "ReadingOrderLevelN": "8B0000FF", "Border": "FFFFFFFF", + "TableRegion": "8B4513FF", + "AdvertRegion": "4682B4FF", + "ChemRegion": "FF8C00FF", + "MusicRegion": "9400D3FF", + "MapRegion": "9ACDD2FF", + "TextRegion": "0000FFFF", + "TextRegion:paragraph": "0000FFFA", + "TextRegion:heading": "0000FFF5", + "TextRegion:caption": "0000FFF0", + "TextRegion:header": "0000FFEB", + "TextRegion:footer": "0000FFE6", + "TextRegion:page-number": "0000FFE1", + "TextRegion:drop-capital": "0000FFDC", + "TextRegion:credit": "0000FFD7", + "TextRegion:floating": "0000FFD2", + "TextRegion:signature-mark": "0000FFCD", + "TextRegion:catch-word": "0000FFC8", + "TextRegion:marginalia": "0000FFC3", + "TextRegion:footnote": "0000FFBE", + "TextRegion:footnote-continued": "0000FFB9", + "TextRegion:endnote": "0000FFB4", + "TextRegion:TOC-entry": "0000FFAF", + "TextRegion:list-label": "0000FFA5", + "TextRegion:other": "0000FFA0", "ChartRegion": "800080FF", "ChartRegion:bar": "800080FA", "ChartRegion:line": "800080F5", - "ChartRegion:other": "800080E1", "ChartRegion:pie": "800080F0", "ChartRegion:scatter": "800080EB", "ChartRegion:surface": "800080E6", - "ChemRegion": "FF8C00FF", - "CustomRegion": "637C81FF", - "Glyph": "2E8B08FF", + "ChartRegion:other": "800080E1", "GraphicRegion": "008000FF", - "GraphicRegion:barcode": "008000D2", + "GraphicRegion:logo": "008000FA", + "GraphicRegion:letterhead": "008000F0", "GraphicRegion:decoration": "008000EB", "GraphicRegion:frame": "008000E6", "GraphicRegion:handwritten-annotation": "008000E1", - "GraphicRegion:letterhead": "008000F0", - "GraphicRegion:logo": "008000FA", - "GraphicRegion:other": "008000C3", + "GraphicRegion:stamp": "008000DC", + "GraphicRegion:signature": "008000D7", + "GraphicRegion:barcode": "008000D2", "GraphicRegion:paper-grow": "008000CD", "GraphicRegion:punch-hole": "008000C8", - "GraphicRegion:signature": "008000D7", - "GraphicRegion:stamp": "008000DC", + "GraphicRegion:other": "008000C3", "ImageRegion": "00CED1FF", "LineDrawingRegion": "B8860BFF", - "MapRegion": "9ACDD2FF", "MathsRegion": "00BFFFFF", - "MusicRegion": "9400D3FF", "NoiseRegion": "FF0000FF", - "ReadingOrderLevel0": "DC143CFF", - "ReadingOrderLevel1": "9400D3FF", - "ReadingOrderLevelN": "8B0000FF", "SeparatorRegion": "FF00FFFF", - "TableRegion": "8B4513FF", - "TextLine": "32CD32FF", - "TextRegion": "0000FFFF", - "TextRegion:TOC-entry": "0000FFAF", - "TextRegion:caption": "0000FFF0", - "TextRegion:catch-word": "0000FFC8", - "TextRegion:credit": "0000FFD7", - "TextRegion:drop-capital": "0000FFDC", - "TextRegion:endnote": "0000FFB4", - "TextRegion:floating": "0000FFD2", - "TextRegion:footer": "0000FFE6", - "TextRegion:footnote": "0000FFBE", - "TextRegion:footnote-continued": "0000FFB9", - "TextRegion:header": "0000FFEB", - "TextRegion:heading": "0000FFF5", - "TextRegion:list-label": "0000FFA5", - "TextRegion:marginalia": "0000FFC3", - "TextRegion:other": "0000FFA0", - "TextRegion:page-number": "0000FFE1", - "TextRegion:paragraph": "0000FFFA", - "TextRegion:signature-mark": "0000FFCD", "UnknownRegion": "646464FF", - "Word": "B22222FF" - }, - "description": "Mapping from segment types to extract to color values in the output mask images and COCO; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped region types will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict parameter of ocrd-segment-from-masks.", - "type": "object" - }, - "feature_filter": { - "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`).", - "type": "string" - }, - "mimetype": { - "default": "image/png", - "description": "File format to save extracted images in.", - "enum": [ - "image/bmp", - "application/postscript", - "image/gif", - "image/jpeg", - "image/jp2", - "image/png", - "image/x-portable-pixmap", - "image/tiff" - ], - "type": "string" - }, - "plot_overlay": { - "default": false, - "description": "When generating mask images with `plot_segmasks`, instead of starting with a blank image and having layers and segments replace each other, start with the raw image and superimpose (alpha-composite) layers and segments.", - "type": "boolean" - }, - "plot_segmasks": { - "default": [ - "region" - ], - "description": "Generate mask images of the page segmentation in the last output fileGrp. Draw filled polygons for each specified PAGE hierarchy level in the list (in that order), where 'page' denotes the Border polygon, 'region' denotes Region types, 'line' denotes TextLine, 'word' denotes Word and 'glyph' denotes Glyph. Each type must be mapped in `colordict`. Where neighbors of the same type intersect, show a warning (unless `plot_overlay` is true). If 'order' is present, then draw arrows for reading order, too.", - "items": { - "enum": [ - "order", - "page", - "region", - "line", - "word", - "glyph" - ], - "type": "string" + "CustomRegion": "637C81FF", + "TextLine": "32CD32FF", + "Word": "B22222FF", + "Glyph": "2E8B08FF" }, - "type": "array" - }, - "transparency": { - "default": true, - "description": "Add alpha channels with segment masks to the images", - "type": "boolean" + "description": "Mapping from segment types to extract to color values in the output mask images and COCO; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped region types will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict parameter of ocrd-segment-from-masks." } - }, - "steps": [ - "layout/analysis" - ] + } }, "ocrd-segment-extract-regions": { + "executable": "ocrd-segment-extract-regions", "categories": [ "Image preprocessing" ], "description": "Extract region segmentation as region images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon) + JSON (including region coordinates/classes and meta-data).", - "executable": "ocrd-segment-extract-regions", "input_file_grp": [ "OCR-D-SEG-BLOCK", "OCR-D-GT-SEG-BLOCK" @@ -2994,27 +3034,19 @@ "output_file_grp": [ "OCR-D-IMG-REGION" ], + "steps": [ + "layout/analysis" + ], "parameters": { + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, "classes": { - "default": [ - "NoiseRegion", - "LineDrawingRegion", - "AdvertRegion", - "ImageRegion", - "ChartRegion", - "MusicRegion", - "GraphicRegion", - "UnknownRegion", - "CustomRegion", - "SeparatorRegion", - "MathsRegion", - "TextRegion", - "MapRegion", - "ChemRegion", - "TableRegion" - ], - "description": "Array of region types to extract e.g. -P classes '[\"TextRegion\", \"TableRegion\", \"ImageRegion\"]' . If empty, all regions are allowed.", + "type": "array", "items": { + "type": "string", "enum": [ "NoiseRegion", "LineDrawingRegion", @@ -3031,19 +3063,29 @@ "MapRegion", "ChemRegion", "TableRegion" - ], - "type": "string" + ] }, - "type": "array" - }, - "feature_filter": { - "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`).", - "type": "string" + "default": [ + "NoiseRegion", + "LineDrawingRegion", + "AdvertRegion", + "ImageRegion", + "ChartRegion", + "MusicRegion", + "GraphicRegion", + "UnknownRegion", + "CustomRegion", + "SeparatorRegion", + "MathsRegion", + "TextRegion", + "MapRegion", + "ChemRegion", + "TableRegion" + ], + "description": "Array of region types to extract e.g. -P classes '[\"TextRegion\", \"TableRegion\", \"ImageRegion\"]' . If empty, all regions are allowed." }, "mimetype": { - "default": "image/png", - "description": "File format to save extracted images in.", + "type": "string", "enum": [ "image/bmp", "application/postscript", @@ -3054,40 +3096,40 @@ "image/x-portable-pixmap", "image/tiff" ], - "type": "string" + "default": "image/png", + "description": "File format to save extracted images in." }, "transparency": { + "type": "boolean", "default": true, - "description": "Add alpha channels with segment masks to the images", - "type": "boolean" + "description": "Add alpha channels with segment masks to the images" } - }, - "steps": [ - "layout/analysis" - ] + } }, - "ocrd-segment-extract-words": { + "ocrd-segment-extract-lines": { + "executable": "ocrd-segment-extract-lines", "categories": [ "Image preprocessing" ], - "description": "Extract word segmentation as word images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", - "executable": "ocrd-segment-extract-words", + "description": "Extract line segmentation as line images + text file + JSON.", "input_file_grp": [ - "OCR-D-SEG-WORD", - "OCR-D-GT-SEG-WORD" + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE" ], "output_file_grp": [ - "OCR-D-IMG-WORD" + "OCR-D-IMG-LINE" + ], + "steps": [ + "layout/analysis" ], "parameters": { "feature_filter": { + "type": "string", "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`).", - "type": "string" + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." }, "mimetype": { - "default": "image/png", - "description": "File format to save extracted images in.", + "type": "string", "enum": [ "image/bmp", "application/postscript", @@ -3098,207 +3140,177 @@ "image/x-portable-pixmap", "image/tiff" ], - "type": "string" + "default": "image/png", + "description": "File format to save extracted images in." }, "transparency": { + "type": "boolean", "default": true, - "description": "Add alpha channels with segment masks to the images", - "type": "boolean" - } - }, - "steps": [ - "layout/analysis" - ] - }, - "ocrd-segment-from-coco": { - "categories": [ - "Layout analysis" - ], - "description": "Import region segmentation from COCO detection format JSON (for all pages). Input fileGrp format is `base,COCO` (i.e. PAGE or original image files first, COCO file second).", - "executable": "ocrd-segment-from-coco", - "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-PAGE" - ], - "output_file_grp": [ - "OCR-D-SEG-BLOCK" - ], - "parameters": {}, - "steps": [ - "layout/segmentation/region" - ] - }, - "ocrd-segment-from-masks": { - "categories": [ - "Layout analysis" - ], - "description": "Import region segmentation from mask images (segments filled with colors encoding classes). Input fileGrp format is `base,mask` (i.e. PAGE or original image files first, mask image files second).", - "executable": "ocrd-segment-from-masks", - "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-PAGE" - ], - "output_file_grp": [ - "OCR-D-SEG-BLOCK" - ], - "parameters": { - "colordict": { - "default": { - "0000FFA0": "TextRegion:other", - "0000FFA5": "TextRegion:list-label", - "0000FFAF": "TextRegion:TOC-entry", - "0000FFB4": "TextRegion:endnote", - "0000FFB9": "TextRegion:footnote-continued", - "0000FFBE": "TextRegion:footnote", - "0000FFC3": "TextRegion:marginalia", - "0000FFC8": "TextRegion:catch-word", - "0000FFCD": "TextRegion:signature-mark", - "0000FFD2": "TextRegion:floating", - "0000FFD7": "TextRegion:credit", - "0000FFDC": "TextRegion:drop-capital", - "0000FFE1": "TextRegion:page-number", - "0000FFE6": "TextRegion:footer", - "0000FFEB": "TextRegion:header", - "0000FFF0": "TextRegion:caption", - "0000FFF5": "TextRegion:heading", - "0000FFFA": "TextRegion:paragraph", - "0000FFFF": "TextRegion", - "008000C3": "GraphicRegion:other", - "008000C8": "GraphicRegion:punch-hole", - "008000CD": "GraphicRegion:paper-grow", - "008000D2": "GraphicRegion:barcode", - "008000D7": "GraphicRegion:signature", - "008000DC": "GraphicRegion:stamp", - "008000E1": "GraphicRegion:handwritten-annotation", - "008000E6": "GraphicRegion:frame", - "008000EB": "GraphicRegion:decoration", - "008000F0": "GraphicRegion:letterhead", - "008000FA": "GraphicRegion:logo", - "008000FF": "GraphicRegion", - "00BFFFFF": "MathsRegion", - "00CED1FF": "ImageRegion", - "4682B4FF": "AdvertRegion", - "637C81FF": "CustomRegion", - "646464FF": "UnknownRegion", - "800080E1": "ChartRegion:other", - "800080E6": "ChartRegion:surface", - "800080EB": "ChartRegion:scatter", - "800080F0": "ChartRegion:pie", - "800080F5": "ChartRegion:line", - "800080FA": "ChartRegion:bar", - "800080FF": "ChartRegion", - "8B4513FF": "TableRegion", - "9400D3FF": "MusicRegion", - "9ACDD2FF": "MapRegion", - "B8860BFF": "LineDrawingRegion", - "FF0000FF": "NoiseRegion", - "FF00FFFF": "SeparatorRegion", - "FF8C00FF": "ChemRegion", - "FFFFFF00": "", - "FFFFFFFF": "Border" + "description": "Add alpha channels with segment masks to the images" + }, + "output-types": { + "type": "array", + "default": [ + "text", + "json", + "xlsx" + ], + "items": { + "type": "string", + "enum": [ + "text", + "json", + "xlsx" + ] }, - "description": "Mapping from color values in the input masks to region types to annotate; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped colors will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict.json output and colordict parameter of ocrd-segment-extract-pages.", - "type": "object" + "description": "What kind of files to extract besides the line image itself (text/json files for each line, xlsx per page)." + }, + "library-convention": { + "type": "string", + "enum": [ + "slub", + "sbb", + "none" + ], + "default": "none", + "description": "For xlsx extraction, to make line images hyperlinked, use this scheme in reconstructing presentation URLs of original pages. Libraries have different conventions in their METS files. Set to none to disable." + }, + "min-line-length": { + "type": "number", + "format": "integer", + "minimum": 0, + "default": 0, + "description": "Only extract lines with at least this many characters." + }, + "min-line-width": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 1, + "description": "Only extract lines that are at least this wide (in px)." + }, + "min-line-height": { + "type": "number", + "format": "integer", + "minimum": 1, + "default": 1, + "description": "Only extract lines that are at least this high (in px)." + }, + "textequiv-index": { + "type": "string", + "enum": [ + "first", + "last", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9" + ], + "default": "first", + "description": "Only extract lines with the specified TextEquiv/@index entries; 'first' and 'last' denote the first and last TextEquiv elements, regardless of their @index, respectively." } - }, - "steps": [ - "layout/segmentation/region" - ] + } }, - "ocrd-segment-project": { + "ocrd-segment-extract-words": { + "executable": "ocrd-segment-extract-words", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Project segment coordinates to their structural parents", - "executable": "ocrd-segment-project", + "description": "Extract word segmentation as word images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", "input_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-SEG-WORD", + "OCR-D-GT-SEG-WORD" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-IMG-WORD" + ], + "steps": [ + "layout/analysis" ], "parameters": { - "level-of-operation": { - "default": "page", - "description": "hierarchy level which to assign new coordinates to", + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." + }, + "mimetype": { + "type": "string", "enum": [ - "page", - "table", - "region", - "line", - "word" + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" ], - "type": "string" + "default": "image/png", + "description": "File format to save extracted images in." }, - "padding": { - "default": 10, - "description": "margin (in px) to extend the hull in every direction", - "format": "integer", - "minimum": 0, - "type": "number" + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" } - }, - "steps": [ - "layout/segmentation" - ] + } }, - "ocrd-segment-repair": { + "ocrd-segment-extract-glyphs": { + "executable": "ocrd-segment-extract-glyphs", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Analyse and repair region segmentation; at least ensure validity and consistency of coordinates.", - "executable": "ocrd-segment-repair", + "description": "Extract glyph segmentation as glyph images (deskewed according to `*/@orientation` and cropped+masked along `*/Coords` polygon and dewarped as in `*/AlternativeImage`) + text file (according to `*/TextEquiv`) + JSON (including line coordinates and meta-data).", "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-BLOCK" + "OCR-D-SEG-GLYPH", + "OCR-D-GT-SEG-GLYPH" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-IMG-GLYPH" + ], + "steps": [ + "layout/analysis" ], "parameters": { - "plausibilize": { - "default": false, - "description": "Identify and remove redundancies on text regions and text lines (deleting/merging/shrinking where overlaps occur).", - "type": "boolean" - }, - "plausibilize_merge_min_overlap": { - "default": 0.9, - "description": "When merging a region or line almost contained in another, require at least this ratio of area is shared with the other.", - "format": "float", - "maximum": 1.0, - "minimum": 0.0, - "type": "number" - }, - "sanitize": { - "default": false, - "description": "Shrink each region such that its coordinates become the minimal concave hull of its binary foreground. (Assumes that a perfect binarization is available.)", - "type": "boolean" + "feature_filter": { + "type": "string", + "default": "", + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)." }, - "sanitize_padding": { - "default": 5, - "description": "When shrinking a region, enlarge the resulting hull by this amount of pixels in each direction.", - "format": "integer", - "minimum": 1, - "type": "number" + "mimetype": { + "type": "string", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "default": "image/png", + "description": "File format to save extracted images in." }, - "simplify": { - "default": 0, - "description": "Distance (in px) used to simplify all segment polygons. (Avoid values larger than xheight/scale, or corners will be chopped off.) Set to 0 to disable.", - "format": "float", - "minimum": 0, - "type": "number" + "transparency": { + "type": "boolean", + "default": true, + "description": "Add alpha channels with segment masks to the images" } - }, - "steps": [ - "layout/segmentation/region" - ] + } }, "ocrd-segment-replace-original": { + "executable": "ocrd-segment-replace-original", "categories": [ "Image preprocessing" ], "description": "Extract page image (deskewed according to `/Page/@orientation` and cropped+masked along `/Page/Border`) and use it as @imageFilename, adjusting all coordinates", - "executable": "ocrd-segment-replace-original", "input_file_grp": [ "OCR-D-SEG-LINE", "OCR-D-GT-SEG-LINE", @@ -3307,33 +3319,33 @@ "output_file_grp": [ "OCR-D-SEG-CROP" ], + "steps": [ + "layout/analysis" + ], "parameters": { - "feature_filter": { + "feature_selector": { + "type": "string", "default": "", - "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)", - "type": "string" + "description": "Comma-separated list of required image features (e.g. `binarized,despeckled`)" }, - "feature_selector": { + "feature_filter": { + "type": "string", "default": "", - "description": "Comma-separated list of required image features (e.g. `binarized,despeckled`)", - "type": "string" + "description": "Comma-separated list of forbidden image features (e.g. `binarized,despeckled`)" }, "transform_coordinates": { + "type": "boolean", "default": true, - "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the chosen image again (vital after cropping, deskewing etc; disable only if input coordinates must be assumed to be inconsistent with the original)", - "type": "boolean" + "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the chosen image again (vital after cropping, deskewing etc; disable only if input coordinates must be assumed to be inconsistent with the original)" } - }, - "steps": [ - "layout/analysis" - ] + } }, "ocrd-segment-replace-page": { + "executable": "ocrd-segment-replace-page", "categories": [ "Image preprocessing" ], "description": "Replace everything below page level with another annotation, adjusting all coordinates", - "executable": "ocrd-segment-replace-page", "input_file_grp": [ "OCR-D-SEG-LINE", "OCR-D-GT-SEG-LINE", @@ -3343,949 +3355,1167 @@ "OCR-D-SEG-LINE", "OCR-D-OCR" ], + "steps": [ + "layout/analysis" + ], "parameters": { "transform_coordinates": { + "type": "boolean", "default": true, - "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the first input file group (vital after cropping, deskewing etc; disable only if input coordinates can be assumed to be consistent with the second input file group)", - "type": "boolean" + "description": "re-calculate coordinates for all segments of the structural hierarchy to be consistent with the coordinate system of the first input file group (vital after cropping, deskewing etc; disable only if input coordinates can be assumed to be consistent with the second input file group)" } - }, - "steps": [ - "layout/analysis" - ] + } }, "ocrd-segment-replace-text": { + "executable": "ocrd-segment-replace-text", "categories": [ "Text recognition and optimization" ], "description": "Insert text from annotations in single-segment text files", - "executable": "ocrd-segment-replace-text", + "steps": [ + "recognition/post-correction" + ], "parameters": { "file_glob": { + "type": "string", "default": "*.gt.txt", - "description": "glob expression which expands to file names to match against page IDs and segment IDs in order to be ingested", - "type": "string" + "description": "glob expression which expands to file names to match against page IDs and segment IDs in order to be ingested" } - }, - "steps": [ - "recognition/post-correction" - ] + } }, - "ocrd-skimage-binarize": { + "ocrd-segment-evaluate": { + "executable": "ocrd-segment-evaluate", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Binarize images with Scikit-image", - "executable": "ocrd-skimage-binarize", + "description": "Compare segmentations", "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-PAGE", - "OCR-D-SEG-REGION", - "OCR-D-SEG-LINE" + "OCR-D-GT-SEG-BLOCK", + "OCR-D-SEG-BLOCK" ], - "output_file_grp": [ - "OCR-D-IMG-BIN", - "OCR-D-SEG-PAGE-BIN", - "OCR-D-SEG-REGION-BIN", - "OCR-D-SEG-LINE-BIN" + "steps": [ + "layout/analysis" ], "parameters": { - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", - "format": "float", - "type": "number" - }, - "k": { - "default": 0.34, - "description": "For Sauvola/Niblack, formula parameter influencing the threshold bias; larger is lighter foreground", - "format": "float", - "type": "number" - }, "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", + "type": "string", "enum": [ - "page", "region", - "line", - "word", - "glyph" + "line" ], - "type": "string" + "default": "region", + "description": "segment hierarchy level to compare GT and predictions at" }, - "method": { - "default": "sauvola", - "description": "Thresholding algorithm to use", - "enum": [ - "sauvola", - "niblack", - "otsu", - "gauss", - "yen", - "li" - ], - "type": "string" + "only-fg": { + "type": "boolean", + "default": false, + "description": "only overlap and compare the foregrounds in the binarized image" }, - "window_size": { - "default": 0, - "description": "For Sauvola/Niblack/Gauss, the (odd) window size in pixels; when zero (default), set to DPI", - "format": "integer", - "type": "number" + "ignore-subtype": { + "type": "boolean", + "default": false, + "description": "on region level, ignore @type differentiation (where applicable)" + }, + "for-categories": { + "type": "string", + "default": "", + "description": "on region level, only compare these region types (comma-separated list; unless `ignore-subtype` is given, append subtypes via `.`; e.g. `TextRegion.page-number,TextRegion.marginalia`)" } - }, - "steps": [ - "preprocessing/optimization/binarization" - ] + } }, - "ocrd-skimage-denoise": { + "ocrd-tesserocr-deskew": { + "executable": "ocrd-tesserocr-deskew", "categories": [ "Image preprocessing" ], - "description": "Denoise binarized images with Scikit-image", - "executable": "ocrd-skimage-denoise", + "description": "Detect script, orientation and skew angle for pages or regions", "input_file_grp": [ - "OCR-D-IMG-BIN", - "OCR-D-SEG-PAGE-BIN", - "OCR-D-SEG-REGION-BIN", - "OCR-D-SEG-LINE-BIN" + "OCR-D-IMG", + "OCR-D-SEG-BLOCK" ], "output_file_grp": [ - "OCR-D-IMG-DEN", - "OCR-D-SEG-PAGE-DEN", - "OCR-D-SEG-REGION-DEN", - "OCR-D-SEG-LINE-DEN" + "OCR-D-DESKEW-BLOCK" + ], + "steps": [ + "preprocessing/optimization/deskewing" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", + "operation_level": { + "type": "string", "enum": [ "page", "region", - "line", - "word", - "glyph" + "line" ], - "type": "string" + "default": "region", + "description": "PAGE XML hierarchy level to operate on" }, - "maxsize": { - "default": 1.0, - "description": "maximum component size of (bg holes or fg specks) noise in pt", + "min_orientation_confidence": { + "type": "number", "format": "float", - "type": "number" - }, - "protect": { - "default": 0.0, - "description": "avoid removing fg specks near larger fg components by up to this distance in pt", + "default": 1.5, + "description": "Minimum confidence score to apply orientation as detected by OSD" + } + } + }, + "ocrd-tesserocr-fontshape": { + "executable": "ocrd-tesserocr-fontshape", + "categories": [ + "Text recognition and optimization" + ], + "description": "Recognize font shapes (family/monospace/bold/italic) and size in segmented words with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons), annotating TextStyle", + "input_file_grp": [ + "OCR-D-SEG-WORD", + "OCR-D-OCR" + ], + "output_file_grp": [ + "OCR-D-OCR-FONTSTYLE" + ], + "steps": [ + "recognition/font-identification" + ], + "parameters": { + "dpi": { + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "padding": { + "type": "number", + "format": "integer", + "default": 0, + "description": "Number of background-filled pixels to add around the word image (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) on each side before recognition." + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/octet-stream", + "default": "osd", + "description": "tessdata model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or osd); must be an old (pre-LSTM) model" } - }, - "steps": [ - "preprocessing/optimization/despeckling" - ] + } }, - "ocrd-skimage-denoise-raw": { + "ocrd-tesserocr-recognize": { + "executable": "ocrd-tesserocr-recognize", "categories": [ - "Image preprocessing" + "Text recognition and optimization" ], - "description": "Denoise raw images with Scikit-image", - "executable": "ocrd-skimage-denoise-raw", + "description": "Segment and/or recognize text with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons) on any level of the PAGE hierarchy.", "input_file_grp": [ - "OCR-D-IMG", "OCR-D-SEG-PAGE", "OCR-D-SEG-REGION", - "OCR-D-SEG-LINE" + "OCR-D-SEG-TABLE", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD" ], "output_file_grp": [ - "OCR-D-IMG-DEN", - "OCR-D-SEG-PAGE-DEN", - "OCR-D-SEG-REGION-DEN", - "OCR-D-SEG-LINE-DEN" + "OCR-D-SEG-REGION", + "OCR-D-SEG-TABLE", + "OCR-D-SEG-LINE", + "OCR-D-SEG-WORD", + "OCR-D-SEG-GLYPH", + "OCR-D-OCR-TESS" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line", + "recognition/text-recognition" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", + "padding": { + "type": "number", + "format": "integer", + "default": 0, + "description": "Extend detected region/cell/line/word rectangles by this many (true) pixels, or extend existing region/line/word images (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) by this many (background/white) pixels on each side before recognition." + }, + "segmentation_level": { + "type": "string", "enum": [ - "page", "region", + "cell", "line", "word", - "glyph" + "glyph", + "none" ], - "type": "string" + "default": "word", + "description": "Highest PAGE XML hierarchy level to remove existing annotation from and detect segments for (before iterating downwards); if ``none``, does not attempt any new segmentation; if ``cell``, starts at table regions, detecting text regions (cells). Ineffective when lower than ``textequiv_level``." }, - "method": { - "default": "VisuShrink", - "description": "Wavelet filtering scheme to use", + "textequiv_level": { + "type": "string", "enum": [ - "BayesShrink", - "VisuShrink" + "region", + "cell", + "line", + "word", + "glyph", + "none" ], - "type": "string" + "default": "word", + "description": "Lowest PAGE XML hierarchy level to re-use or detect segments for and add the TextEquiv results to (before projecting upwards); if ``none``, adds segmentation down to the glyph level, but does not attempt recognition at all; if ``cell``, stops short before text lines, adding text of text regions inside tables (cells) or on page level only." + }, + "overwrite_segments": { + "type": "boolean", + "default": false, + "description": "If ``segmentation_level`` is not none, but an element already contains segments, remove them and segment again. Otherwise use the existing segments of that element." + }, + "overwrite_text": { + "type": "boolean", + "default": true, + "description": "If ``textequiv_level`` is not none, but a segment already contains TextEquivs, remove them and replace with recognised text. Otherwise add new text as alternative. (Only the first entry is projected upwards.)" + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "When detecting any segments, annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols." + }, + "block_polygons": { + "type": "boolean", + "default": false, + "description": "When detecting regions, annotate polygon coordinates instead of bounding box rectangles by querying Tesseract accordingly." + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "When detecting regions, recognise tables as table regions (Tesseract's ``textord_tabfind_find_tables=1``)." + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", + "default": false, + "description": "When detecting regions, use 'sparse text' page segmentation mode (finding as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space." + }, + "raw_lines": { + "type": "boolean", + "default": false, + "description": "When detecting lines, do not attempt additional segmentation (baseline+xheight+ascenders/descenders prediction) on line images. Can increase accuracy for certain workflows. Disable when line segments/images may contain components of more than 1 line, or larger gaps/white-spaces." + }, + "char_whitelist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow exclusively; overruled by blacklist if set." + }, + "char_blacklist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to suppress; overruled by unblacklist if set." + }, + "char_unblacklist": { + "type": "string", + "default": "", + "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow inclusively." + }, + "tesseract_parameters": { + "type": "object", + "default": {}, + "description": "Dictionary of additional Tesseract runtime variables (cf. tesseract --print-parameters), string values." + }, + "xpath_parameters": { + "type": "object", + "default": {}, + "description": "Set additional Tesseract runtime variables according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) (Example: {'ancestor::TextRegion/@type=\"page-number\"': {'char_whitelist': '0123456789-'}, 'contains(@custom,\"ISBN\")': {'char_whitelist': '0123456789-'}})" + }, + "xpath_model": { + "type": "object", + "default": {}, + "description": "Prefer models mapped according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) If no queries / mappings match (or under the default empty parameter), then fall back to `model`. If there are multiple matches, combine their results. (Example: {'starts-with(@script,\"Latn\")': 'Latin', 'starts-with(@script,\"Grek\")': 'Greek', '@language=\"Latin\"': 'lat', '@language=\"Greek\"': 'grc+ell', 'ancestor::TextRegion/@type=\"page-number\"': 'eng'})" + }, + "auto_model": { + "type": "boolean", + "default": false, + "description": "Prefer models performing best (by confidence) per segment (if multiple given in `model`). Repeats the OCR of the best model once (i.e. slower). (Use as a fallback to xpath_model if you do not trust script/language detection.)" + }, + "model": { + "type": "string", + "format": "uri", + "content-type": "application/octet-stream", + "description": "The tessdata text recognition model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or Fraktur)." + }, + "oem": { + "type": "string", + "enum": [ + "TESSERACT_ONLY", + "LSTM_ONLY", + "TESSERACT_LSTM_COMBINED", + "DEFAULT" + ], + "default": "DEFAULT", + "description": "Tesseract OCR engine mode to use:\n* Run Tesseract only - fastest,\n* Run just the LSTM line recognizer. (>=v4.00),\n*Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. (>=v4.00),\n*Run both and combine results - best accuracy." } }, - "steps": [ - "preprocessing/optimization/despeckling" + "resource_locations": [ + "module" + ], + "resources": [ + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/Fraktur_5000000/tessdata_fast/Fraktur_50000000.334_450937.traineddata", + "name": "Fraktur_GT4HistOCR.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model trained on GT4HistOCR", + "size": 1058487 + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/ONB/tessdata_best/ONB_1.195_300718_989100.traineddata", + "name": "ONB.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model based on Austrian National Library newspaper data", + "size": 4358948 + }, + { + "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/frak2021/tessdata_best/frak2021-0.905.traineddata", + "name": "frak2021.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model based on Austrian National Library newspaper data", + "size": 3421140 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/equ.traineddata", + "name": "equ.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract legacy model for mathematical equations", + "size": 2251950 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata", + "name": "osd.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract legacy model for orientation and script detection", + "size": 10562727 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata", + "name": "eng.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) English", + "size": 4113088 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/deu.traineddata", + "name": "deu.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) German", + "size": 1525436 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/frk.traineddata", + "name": "frk.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for historical (Fraktur typesetting and letterpress printing) German", + "size": 6423052 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/script/Fraktur.traineddata", + "name": "Fraktur.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for historical Latin script with Fraktur typesetting", + "size": 10915632 + }, + { + "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/script/Latin.traineddata", + "name": "Latin.traineddata", + "parameter_usage": "without-extension", + "description": "Tesseract LSTM model for contemporary and historical Latin script", + "size": 89384811 + }, + { + "url": "https://github.com/tesseract-ocr/tesseract/archive/main.tar.gz", + "name": "configs", + "description": "Tesseract configs (parameter sets) for use with the standalone tesseract CLI", + "size": 1915529, + "type": "archive", + "path_in_archive": "tesseract-main/tessdata/configs" + } ] }, - "ocrd-skimage-normalize": { + "ocrd-tesserocr-segment": { + "executable": "ocrd-tesserocr-segment", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Equalize contrast/exposure of images with Scikit-image; stretches the color value/tone to the full dynamic range", - "executable": "ocrd-skimage-normalize", + "description": "Segment page into regions and lines with Tesseract", "input_file_grp": [ "OCR-D-IMG", "OCR-D-SEG-PAGE", - "OCR-D-SEG-REGION", - "OCR-D-SEG-LINE" + "OCR-D-GT-SEG-PAGE" ], "output_file_grp": [ - "OCR-D-IMG-NRM", - "OCR-D-SEG-PAGE-NRM", - "OCR-D-SEG-REGION-NRM", - "OCR-D-SEG-LINE-NRM" + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/region", + "layout/segmentation/line" ], "parameters": { - "black-point": { - "default": 1.0, - "description": "black point point in percent of luminance/value/tone histogram; up to ``black-point`` darkest pixels will be clipped to black when stretching", - "format": "float", - "type": "number" - }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "level-of-operation": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region", - "line", - "word", - "glyph" - ], - "type": "string" + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected region rectangles by this many (true) pixels", + "default": 4 }, - "method": { - "default": "stretch", - "description": "contrast-enhancing transformation to use after clipping; ``stretch`` uses ``skimage.exposure.rescale_intensity`` (globally linearly stretching to full dynamic range) and ``adapthist`` uses ``skimage.exposure.equalize_adapthist`` (applying over tiles with context from 1/8th of the image's width)", - "enum": [ - "stretch", - "adapthist" - ], - "type": "string" + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" }, - "white-point": { - "default": 7.0, - "description": "white point in percent of luminance/value/tone histogram; up to ``white-point`` brightest pixels will be clipped to white when stretching", - "format": "float", - "type": "number" + "block_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles" + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "recognise tables as table regions (textord_tabfind_find_tables)" + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", + "default": false, + "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space" } - }, - "steps": [ - "preprocessing/optimization" - ] + } }, - "ocrd-tesserocr-binarize": { + "ocrd-tesserocr-segment-region": { + "executable": "ocrd-tesserocr-segment-region", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Binarize regions or lines with Tesseract's global Otsu", - "executable": "ocrd-tesserocr-binarize", + "description": "Segment page into regions with Tesseract", "input_file_grp": [ "OCR-D-IMG", - "OCR-D-SEG-BLOCK", - "OCR-D-SEG-LINE" + "OCR-D-SEG-PAGE", + "OCR-D-GT-SEG-PAGE" ], "output_file_grp": [ - "OCR-D-BIN-BLOCK", - "OCR-D-BIN-LINE" + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "operation_level": { - "default": "page", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region", - "line" - ], - "type": "string" + "overwrite_regions": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the Page level (otherwise skip page; no incremental annotation yet)." }, - "tiseg": { + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected region rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" + }, + "crop_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles" + }, + "find_tables": { + "type": "boolean", + "default": true, + "description": "recognise tables as table regions (textord_tabfind_find_tables)" + }, + "find_staves": { + "type": "boolean", + "default": false, + "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves." + }, + "sparse_text": { + "type": "boolean", "default": false, - "description": "also separate text vs image by detecting and suppressing photo+sepline mask", - "type": "boolean" + "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space" } - }, - "steps": [ - "preprocessing/optimization/binarization" - ] + } }, - "ocrd-tesserocr-crop": { + "ocrd-tesserocr-segment-table": { + "executable": "ocrd-tesserocr-segment-table", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Poor man's cropping via region segmentation", - "executable": "ocrd-tesserocr-crop", + "description": "Segment table regions into cell text regions with Tesseract", "input_file_grp": [ - "OCR-D-IMG" + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" ], "output_file_grp": [ - "OCR-D-SEG-PAGE" + "OCR-D-SEG-BLOCK" + ], + "steps": [ + "layout/segmentation/region" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 + }, + "overwrite_cells": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TableRegion level (otherwise skip table; no incremental annotation yet)." }, "padding": { - "default": 4, - "description": "extend detected border by this many (true) pixels on every side", + "type": "number", "format": "integer", - "type": "number" + "description": "extend detected cell rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" } - }, - "steps": [ - "preprocessing/optimization/cropping" - ] + } }, - "ocrd-tesserocr-deskew": { + "ocrd-tesserocr-segment-line": { + "executable": "ocrd-tesserocr-segment-line", "categories": [ - "Image preprocessing" + "Layout analysis" ], - "description": "Detect script, orientation and skew angle for pages or regions", - "executable": "ocrd-tesserocr-deskew", + "description": "Segment regions into lines with Tesseract", "input_file_grp": [ - "OCR-D-IMG", - "OCR-D-SEG-BLOCK" + "OCR-D-SEG-BLOCK", + "OCR-D-GT-SEG-BLOCK" ], "output_file_grp": [ - "OCR-D-DESKEW-BLOCK" + "OCR-D-SEG-LINE" + ], + "steps": [ + "layout/segmentation/line" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "min_orientation_confidence": { - "default": 1.5, - "description": "Minimum confidence score to apply orientation as detected by OSD", - "format": "float", - "type": "number" + "overwrite_lines": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TextRegion level (otherwise skip region; no incremental annotation yet)." }, - "operation_level": { - "default": "region", - "description": "PAGE XML hierarchy level to operate on", - "enum": [ - "page", - "region", - "line" - ], - "type": "string" + "padding": { + "type": "number", + "format": "integer", + "description": "extend detected line rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" } - }, - "steps": [ - "preprocessing/optimization/deskewing" - ] + } }, - "ocrd-tesserocr-fontshape": { + "ocrd-tesserocr-segment-word": { + "executable": "ocrd-tesserocr-segment-word", "categories": [ - "Text recognition and optimization" + "Layout analysis" ], - "description": "Recognize font shapes (family/monospace/bold/italic) and size in segmented words with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons), annotating TextStyle", - "executable": "ocrd-tesserocr-fontshape", + "description": "Segment lines into words with Tesseract", "input_file_grp": [ - "OCR-D-SEG-WORD", - "OCR-D-OCR" + "OCR-D-SEG-LINE", + "OCR-D-GT-SEG-LINE" ], "output_file_grp": [ - "OCR-D-OCR-FONTSTYLE" + "OCR-D-SEG-WORD" + ], + "steps": [ + "layout/segmentation/word" ], "parameters": { "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "model": { - "content-type": "application/octet-stream", - "default": "osd", - "description": "tessdata model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or osd); must be an old (pre-LSTM) model", - "format": "uri", - "type": "string" + "overwrite_words": { + "type": "boolean", + "default": true, + "description": "Remove existing layout and text annotation below the TextLine level (otherwise skip line; no incremental annotation yet)." }, "padding": { - "default": 0, - "description": "Number of background-filled pixels to add around the word image (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) on each side before recognition.", + "type": "number", "format": "integer", - "type": "number" + "description": "extend detected cell rectangles by this many (true) pixels", + "default": 0 + }, + "shrink_polygons": { + "type": "boolean", + "default": false, + "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols" } - }, - "steps": [ - "recognition/font-identification" - ] + } }, - "ocrd-tesserocr-recognize": { + "ocrd-tesserocr-crop": { + "executable": "ocrd-tesserocr-crop", "categories": [ - "Text recognition and optimization" + "Image preprocessing" ], - "description": "Segment and/or recognize text with Tesseract (using annotated derived images, or masking and cropping images from coordinate polygons) on any level of the PAGE hierarchy.", - "executable": "ocrd-tesserocr-recognize", + "description": "Poor man's cropping via region segmentation", "input_file_grp": [ - "OCR-D-SEG-PAGE", - "OCR-D-SEG-REGION", - "OCR-D-SEG-TABLE", - "OCR-D-SEG-LINE", - "OCR-D-SEG-WORD" + "OCR-D-IMG" ], "output_file_grp": [ - "OCR-D-SEG-REGION", - "OCR-D-SEG-TABLE", - "OCR-D-SEG-LINE", - "OCR-D-SEG-WORD", - "OCR-D-SEG-GLYPH", - "OCR-D-OCR-TESS" + "OCR-D-SEG-PAGE" + ], + "steps": [ + "preprocessing/optimization/cropping" ], "parameters": { - "auto_model": { - "default": false, - "description": "Prefer models performing best (by confidence) per segment (if multiple given in `model`). Repeats the OCR of the best model once (i.e. slower). (Use as a fallback to xpath_model if you do not trust script/language detection.)", - "type": "boolean" - }, - "block_polygons": { - "default": false, - "description": "When detecting regions, annotate polygon coordinates instead of bounding box rectangles by querying Tesseract accordingly.", - "type": "boolean" - }, - "char_blacklist": { - "default": "", - "description": "When recognizing text, enumeration of character hypotheses (from the model) to suppress; overruled by unblacklist if set.", - "type": "string" - }, - "char_unblacklist": { - "default": "", - "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow inclusively.", - "type": "string" - }, - "char_whitelist": { - "default": "", - "description": "When recognizing text, enumeration of character hypotheses (from the model) to allow exclusively; overruled by blacklist if set.", - "type": "string" - }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" - }, - "find_staves": { - "default": false, - "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves.", - "type": "boolean" - }, - "find_tables": { - "default": true, - "description": "When detecting regions, recognise tables as table regions (Tesseract's ``textord_tabfind_find_tables=1``).", - "type": "boolean" - }, - "model": { - "content-type": "application/octet-stream", - "description": "The tessdata text recognition model to apply (an ISO 639-3 language specification or some other basename, e.g. deu-frak or Fraktur).", - "format": "uri", - "type": "string" - }, - "oem": { - "default": "DEFAULT", - "description": "Tesseract OCR engine mode to use:\n* Run Tesseract only - fastest,\n* Run just the LSTM line recognizer. (>=v4.00),\n*Run the LSTM recognizer, but allow fallback to Tesseract when things get difficult. (>=v4.00),\n*Run both and combine results - best accuracy.", - "enum": [ - "TESSERACT_ONLY", - "LSTM_ONLY", - "TESSERACT_LSTM_COMBINED", - "DEFAULT" - ], - "type": "string" - }, - "overwrite_segments": { - "default": false, - "description": "If ``segmentation_level`` is not none, but an element already contains segments, remove them and segment again. Otherwise use the existing segments of that element.", - "type": "boolean" - }, - "overwrite_text": { - "default": true, - "description": "If ``textequiv_level`` is not none, but a segment already contains TextEquivs, remove them and replace with recognised text. Otherwise add new text as alternative. (Only the first entry is projected upwards.)", - "type": "boolean" + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, "padding": { - "default": 0, - "description": "Extend detected region/cell/line/word rectangles by this many (true) pixels, or extend existing region/line/word images (i.e. the annotated AlternativeImage if it exists or the higher-level image cropped to the bounding box and masked by the polygon otherwise) by this many (background/white) pixels on each side before recognition.", + "type": "number", "format": "integer", - "type": "number" - }, - "raw_lines": { - "default": false, - "description": "When detecting lines, do not attempt additional segmentation (baseline+xheight+ascenders/descenders prediction) on line images. Can increase accuracy for certain workflows. Disable when line segments/images may contain components of more than 1 line, or larger gaps/white-spaces.", - "type": "boolean" + "description": "extend detected border by this many (true) pixels on every side", + "default": 4 + } + } + }, + "ocrd-tesserocr-binarize": { + "executable": "ocrd-tesserocr-binarize", + "categories": [ + "Image preprocessing" + ], + "description": "Binarize regions or lines with Tesseract's global Otsu", + "input_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-BLOCK", + "OCR-D-SEG-LINE" + ], + "output_file_grp": [ + "OCR-D-BIN-BLOCK", + "OCR-D-BIN-LINE" + ], + "steps": [ + "preprocessing/optimization/binarization" + ], + "parameters": { + "dpi": { + "type": "number", + "format": "float", + "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "default": 0 }, - "segmentation_level": { - "default": "word", - "description": "Highest PAGE XML hierarchy level to remove existing annotation from and detect segments for (before iterating downwards); if ``none``, does not attempt any new segmentation; if ``cell``, starts at table regions, detecting text regions (cells). Ineffective when lower than ``textequiv_level``.", + "operation_level": { + "type": "string", "enum": [ + "page", "region", - "cell", - "line", - "word", - "glyph", - "none" + "line" ], - "type": "string" - }, - "shrink_polygons": { - "default": false, - "description": "When detecting any segments, annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols.", - "type": "boolean" + "default": "page", + "description": "PAGE XML hierarchy level to operate on" }, - "sparse_text": { + "tiseg": { + "type": "boolean", "default": false, - "description": "When detecting regions, use 'sparse text' page segmentation mode (finding as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space.", - "type": "boolean" - }, - "tesseract_parameters": { - "default": {}, - "description": "Dictionary of additional Tesseract runtime variables (cf. tesseract --print-parameters), string values.", - "type": "object" - }, - "textequiv_level": { - "default": "word", - "description": "Lowest PAGE XML hierarchy level to re-use or detect segments for and add the TextEquiv results to (before projecting upwards); if ``none``, adds segmentation down to the glyph level, but does not attempt recognition at all; if ``cell``, stops short before text lines, adding text of text regions inside tables (cells) or on page level only.", + "description": "also separate text vs image by detecting and suppressing photo+sepline mask" + } + } + }, + "ocrd-typegroups-classifier": { + "executable": "ocrd-typegroups-classifier", + "description": "Classification of 15th century type groups", + "categories": [ + "Text recognition and optimization" + ], + "steps": [ + "recognition/font-identification" + ], + "input_file_grp": [ + "OCR-D-IMG" + ], + "output_file_grp": [ + "OCR-D-IMG-FONTS" + ], + "parameters": { + "level": { + "description": "The PAGE-XML hierarchy level to operate on", + "type": "string", "enum": [ + "page", "region", - "cell", - "line", - "word", - "glyph", - "none" + "line" ], - "type": "string" + "default": "page" }, - "xpath_model": { - "default": {}, - "description": "Prefer models mapped according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) If no queries / mappings match (or under the default empty parameter), then fall back to `model`. If there are multiple matches, combine their results. (Example: {'starts-with(@script,\"Latn\")': 'Latin', 'starts-with(@script,\"Grek\")': 'Greek', '@language=\"Latin\"': 'lat', '@language=\"Greek\"': 'grc+ell', 'ancestor::TextRegion/@type=\"page-number\"': 'eng'})", - "type": "object" + "network": { + "description": "The file name of the neural network to use, including sufficient path information", + "type": "string", + "required": false }, - "xpath_parameters": { - "default": {}, - "description": "Set additional Tesseract runtime variables according to results of XPath queries into the segment. (As a convenience, `@language` and `@script` also match their upwards `@primary*` and `@secondary*` variants where applicable.) (Example: {'ancestor::TextRegion/@type=\"page-number\"': {'char_whitelist': '0123456789-'}, 'contains(@custom,\"ISBN\")': {'char_whitelist': '0123456789-'}})", - "type": "object" + "active_classes": { + "description": "Restrict the set of decoded font type groups to the given list; if empty, then allow all classes (i.e. antiqua/bastarda/fraktur/gotico_antiqua/greek/hebrew/italic/rotunda/schwabacher/textura/other_font/not_a_font)", + "type": "array", + "items": { + "type": "string" + }, + "default": [] + }, + "stride": { + "description": "Stride applied to the CNN on the image. Should be between 1 and 224. Smaller values increase the computation time.", + "type": "number", + "format": "integer", + "default": 112 } }, - "resource_locations": [ - "module" - ], "resources": [ { - "description": "Tesseract LSTM model trained on GT4HistOCR", - "name": "Fraktur_GT4HistOCR.traineddata", - "parameter_usage": "without-extension", - "size": 1058487, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/Fraktur_5000000/tessdata_fast/Fraktur_50000000.334_450937.traineddata" - }, - { - "description": "Tesseract LSTM model based on Austrian National Library newspaper data", - "name": "ONB.traineddata", - "parameter_usage": "without-extension", - "size": 4358948, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/ocrd-train/data/ONB/tessdata_best/ONB_1.195_300718_989100.traineddata" - }, - { - "description": "Tesseract LSTM model based on Austrian National Library newspaper data", - "name": "frak2021.traineddata", - "parameter_usage": "without-extension", - "size": 3421140, - "url": "https://ub-backup.bib.uni-mannheim.de/~stweil/tesstrain/frak2021/tessdata_best/frak2021-0.905.traineddata" - }, - { - "description": "Tesseract legacy model for mathematical equations", - "name": "equ.traineddata", - "parameter_usage": "without-extension", - "size": 2251950, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/equ.traineddata" - }, - { - "description": "Tesseract legacy model for orientation and script detection", - "name": "osd.traineddata", - "parameter_usage": "without-extension", - "size": 10562727, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/osd.traineddata" - }, - { - "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) English", - "name": "eng.traineddata", - "parameter_usage": "without-extension", - "size": 4113088, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata" - }, - { - "description": "Tesseract LSTM model for contemporary (computer typesetting and offset printing) German", - "name": "deu.traineddata", - "parameter_usage": "without-extension", - "size": 1525436, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/deu.traineddata" - }, - { - "description": "Tesseract LSTM model for historical (Fraktur typesetting and letterpress printing) German", - "name": "frk.traineddata", - "parameter_usage": "without-extension", - "size": 6423052, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/frk.traineddata" - }, - { - "description": "Tesseract LSTM model for historical Latin script with Fraktur typesetting", - "name": "Fraktur.traineddata", - "parameter_usage": "without-extension", - "size": 10915632, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/script/Fraktur.traineddata" + "url": "https://github.com/OCR-D/ocrd_typegroups_classifier/raw/master/ocrd_typegroups_classifier/models/densenet121.tgc", + "name": "densenet121.tgc", + "description": "Network to predict font families with. Bundled with standard installation.", + "size": 28509377 }, { - "description": "Tesseract LSTM model for contemporary and historical Latin script", - "name": "Latin.traineddata", - "parameter_usage": "without-extension", - "size": 89384811, - "url": "https://github.com/tesseract-ocr/tessdata_fast/raw/main/script/Latin.traineddata" - }, - { - "description": "Tesseract configs (parameter sets) for use with the standalone tesseract CLI", - "name": "configs", - "path_in_archive": "tesseract-main/tessdata/configs", - "size": 1915529, - "type": "archive", - "url": "https://github.com/tesseract-ocr/tesseract/archive/main.tar.gz" + "url": "https://github.com/OCR-D/ocrd_typegroups_classifier/raw/master/ocrd_typegroups_classifier/models/colwise_classifier.tgc", + "name": "colwise_classifier.tgc", + "description": "Network to predict font families with. Bundled with standard installation.", + "size": 186208 } - ], - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line", - "recognition/text-recognition" ] }, - "ocrd-tesserocr-segment": { + "ocrd-preprocess-image": { + "executable": "ocrd-preprocess-image", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Segment page into regions and lines with Tesseract", - "executable": "ocrd-tesserocr-segment", + "steps": [ + "preprocessing/optimization" + ], + "description": "Convert or enhance images", "input_file_grp": [ "OCR-D-IMG", "OCR-D-SEG-PAGE", - "OCR-D-GT-SEG-PAGE" + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" ], "output_file_grp": [ + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", "OCR-D-SEG-LINE" ], "parameters": { - "block_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles", - "type": "boolean" + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" }, - "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", - "format": "float", - "type": "number" + "input_feature_selector": { + "type": "string", + "default": "", + "description": "comma-separated list of required image features (e.g. binarized,despeckled)" }, - "find_staves": { - "default": false, - "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves.", - "type": "boolean" + "input_feature_filter": { + "type": "string", + "default": "", + "description": "comma-separated list of forbidden image features (e.g. binarized,despeckled)" }, - "find_tables": { - "default": true, - "description": "recognise tables as table regions (textord_tabfind_find_tables)", - "type": "boolean" + "output_feature_added": { + "type": "string", + "required": true, + "description": "image feature(s) to be added after this operation (if multiple, separate by comma)" }, - "padding": { - "default": 4, - "description": "extend detected region rectangles by this many (true) pixels", - "format": "integer", - "type": "number" + "input_mimetype": { + "type": "string", + "default": "image/png", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "description": "File format to save input images to (tool's expected input)" }, - "shrink_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols", - "type": "boolean" + "output_mimetype": { + "type": "string", + "default": "image/png", + "enum": [ + "image/bmp", + "application/postscript", + "image/gif", + "image/jpeg", + "image/jp2", + "image/png", + "image/x-portable-pixmap", + "image/tiff" + ], + "description": "File format to load output images from (tool's expected output)" }, - "sparse_text": { - "default": false, - "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space", - "type": "boolean" + "command": { + "type": "string", + "required": true, + "description": "shell command to operate on image files, with @INFILE as place-holder for the input file path, and @OUTFILE as place-holder for the output file path" } - }, - "steps": [ - "layout/segmentation/region", - "layout/segmentation/line" - ] + } }, - "ocrd-tesserocr-segment-line": { + "ocrd-skimage-binarize": { + "executable": "ocrd-skimage-binarize", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Segment regions into lines with Tesseract", - "executable": "ocrd-tesserocr-segment-line", + "steps": [ + "preprocessing/optimization/binarization" + ], + "description": "Binarize images with Scikit-image", "input_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-GT-SEG-BLOCK" + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-SEG-LINE" + "OCR-D-IMG-BIN", + "OCR-D-SEG-PAGE-BIN", + "OCR-D-SEG-REGION-BIN", + "OCR-D-SEG-LINE-BIN" ], "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 }, - "overwrite_lines": { - "default": true, - "description": "Remove existing layout and text annotation below the TextRegion level (otherwise skip region; no incremental annotation yet).", - "type": "boolean" + "method": { + "type": "string", + "default": "sauvola", + "enum": [ + "sauvola", + "niblack", + "otsu", + "gauss", + "yen", + "li" + ], + "description": "Thresholding algorithm to use" }, - "padding": { - "default": 0, - "description": "extend detected line rectangles by this many (true) pixels", + "window_size": { + "type": "number", "format": "integer", - "type": "number" + "default": 0, + "description": "For Sauvola/Niblack/Gauss, the (odd) window size in pixels; when zero (default), set to DPI" }, - "shrink_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols", - "type": "boolean" + "k": { + "type": "number", + "format": "float", + "default": 0.34, + "description": "For Sauvola/Niblack, formula parameter influencing the threshold bias; larger is lighter foreground" } - }, - "steps": [ - "layout/segmentation/line" - ] + } }, - "ocrd-tesserocr-segment-region": { + "ocrd-skimage-denoise-raw": { + "executable": "ocrd-skimage-denoise-raw", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Segment page into regions with Tesseract", - "executable": "ocrd-tesserocr-segment-region", + "steps": [ + "preprocessing/optimization/despeckling" + ], + "description": "Denoise raw images with Scikit-image", "input_file_grp": [ "OCR-D-IMG", "OCR-D-SEG-PAGE", - "OCR-D-GT-SEG-PAGE" + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-IMG-DEN", + "OCR-D-SEG-PAGE-DEN", + "OCR-D-SEG-REGION-DEN", + "OCR-D-SEG-LINE-DEN" ], "parameters": { - "crop_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles", - "type": "boolean" + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" - }, - "find_staves": { - "default": false, - "description": "When detecting regions, recognize music staves as non-text, suppressing it in the binary image (Tesseract's ``pageseg_apply_music_mask``). Note that this might wrongly detect tables as staves.", - "type": "boolean" - }, - "find_tables": { - "default": true, - "description": "recognise tables as table regions (textord_tabfind_find_tables)", - "type": "boolean" - }, - "overwrite_regions": { - "default": true, - "description": "Remove existing layout and text annotation below the Page level (otherwise skip page; no incremental annotation yet).", - "type": "boolean" - }, - "padding": { - "default": 0, - "description": "extend detected region rectangles by this many (true) pixels", - "format": "integer", - "type": "number" - }, - "shrink_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols", - "type": "boolean" + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 }, - "sparse_text": { - "default": false, - "description": "use 'sparse text' page segmentation mode (find as much text as possible in no particular order): only text regions, single lines without vertical or horizontal space", - "type": "boolean" + "method": { + "type": "string", + "default": "VisuShrink", + "enum": [ + "BayesShrink", + "VisuShrink" + ], + "description": "Wavelet filtering scheme to use" } - }, - "steps": [ - "layout/segmentation/region" - ] + } }, - "ocrd-tesserocr-segment-table": { + "ocrd-skimage-denoise": { + "executable": "ocrd-skimage-denoise", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Segment table regions into cell text regions with Tesseract", - "executable": "ocrd-tesserocr-segment-table", + "steps": [ + "preprocessing/optimization/despeckling" + ], + "description": "Denoise binarized images with Scikit-image", "input_file_grp": [ - "OCR-D-SEG-BLOCK", - "OCR-D-GT-SEG-BLOCK" + "OCR-D-IMG-BIN", + "OCR-D-SEG-PAGE-BIN", + "OCR-D-SEG-REGION-BIN", + "OCR-D-SEG-LINE-BIN" ], "output_file_grp": [ - "OCR-D-SEG-BLOCK" + "OCR-D-IMG-DEN", + "OCR-D-SEG-PAGE-DEN", + "OCR-D-SEG-REGION-DEN", + "OCR-D-SEG-LINE-DEN" ], "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" - }, - "overwrite_cells": { - "default": true, - "description": "Remove existing layout and text annotation below the TableRegion level (otherwise skip table; no incremental annotation yet).", - "type": "boolean" + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 }, - "padding": { - "default": 0, - "description": "extend detected cell rectangles by this many (true) pixels", - "format": "integer", - "type": "number" + "protect": { + "type": "number", + "format": "float", + "default": 0.0, + "description": "avoid removing fg specks near larger fg components by up to this distance in pt" }, - "shrink_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols", - "type": "boolean" + "maxsize": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "maximum component size of (bg holes or fg specks) noise in pt" } - }, - "steps": [ - "layout/segmentation/region" - ] + } }, - "ocrd-tesserocr-segment-word": { + "ocrd-skimage-normalize": { + "executable": "ocrd-skimage-normalize", "categories": [ - "Layout analysis" + "Image preprocessing" ], - "description": "Segment lines into words with Tesseract", - "executable": "ocrd-tesserocr-segment-word", + "steps": [ + "preprocessing/optimization" + ], + "description": "Equalize contrast/exposure of images with Scikit-image; stretches the color value/tone to the full dynamic range", "input_file_grp": [ - "OCR-D-SEG-LINE", - "OCR-D-GT-SEG-LINE" + "OCR-D-IMG", + "OCR-D-SEG-PAGE", + "OCR-D-SEG-REGION", + "OCR-D-SEG-LINE" ], "output_file_grp": [ - "OCR-D-SEG-WORD" + "OCR-D-IMG-NRM", + "OCR-D-SEG-PAGE-NRM", + "OCR-D-SEG-REGION-NRM", + "OCR-D-SEG-LINE-NRM" ], "parameters": { + "level-of-operation": { + "type": "string", + "enum": [ + "page", + "region", + "line", + "word", + "glyph" + ], + "default": "page", + "description": "PAGE XML hierarchy level to operate on" + }, "dpi": { - "default": 0, - "description": "pixel density in dots per inch (overrides any meta-data in the images)", + "type": "number", "format": "float", - "type": "number" + "description": "pixel density in dots per inch (overrides any meta-data in the images); disabled when zero", + "default": 0 }, - "overwrite_words": { - "default": true, - "description": "Remove existing layout and text annotation below the TextLine level (otherwise skip line; no incremental annotation yet).", - "type": "boolean" + "black-point": { + "type": "number", + "format": "float", + "default": 1.0, + "description": "black point point in percent of luminance/value/tone histogram; up to ``black-point`` darkest pixels will be clipped to black when stretching" }, - "padding": { - "default": 0, - "description": "extend detected cell rectangles by this many (true) pixels", - "format": "integer", - "type": "number" + "white-point": { + "type": "number", + "format": "float", + "default": 7.0, + "description": "white point in percent of luminance/value/tone histogram; up to ``white-point`` brightest pixels will be clipped to white when stretching" }, - "shrink_polygons": { - "default": false, - "description": "annotate polygon coordinates instead of bounding box rectangles by projecting the convex hull of all symbols", - "type": "boolean" + "method": { + "type": "string", + "default": "stretch", + "enum": [ + "stretch", + "adapthist" + ], + "description": "contrast-enhancing transformation to use after clipping; ``stretch`` uses ``skimage.exposure.rescale_intensity`` (globally linearly stretching to full dynamic range) and ``adapthist`` uses ``skimage.exposure.equalize_adapthist`` (applying over tiles with context from 1/8th of the image's width)" } - }, - "steps": [ - "layout/segmentation/word" - ] + } }, - "ocrd-typegroups-classifier": { + "ocrd-sbb-binarize": { + "executable": "ocrd-sbb-binarize", + "description": "Pixelwise binarization with selectional auto-encoders in Keras", "categories": [ - "Text recognition and optimization" - ], - "description": "Classification of 15th century type groups", - "executable": "ocrd-typegroups-classifier", - "input_file_grp": [ - "OCR-D-IMG" + "Image preprocessing" ], - "output_file_grp": [ - "OCR-D-IMG-FONTS" + "steps": [ + "preprocessing/optimization/binarization" ], + "input_file_grp": [], + "output_file_grp": [], "parameters": { - "active_classes": { - "default": [], - "description": "Restrict the set of decoded font type groups to the given list; if empty, then allow all classes (i.e. antiqua/bastarda/fraktur/gotico_antiqua/greek/hebrew/italic/rotunda/schwabacher/textura/other_font/not_a_font)", - "items": { - "type": "string" - }, - "type": "array" - }, - "level": { - "default": "page", - "description": "The PAGE-XML hierarchy level to operate on", + "operation_level": { + "type": "string", "enum": [ "page", "region" ], - "type": "string" - }, - "network": { - "description": "The file name of the neural network to use, including sufficient path information", - "required": false, - "type": "string" + "default": "page", + "description": "PAGE XML hierarchy level to operate on" }, - "stride": { - "default": 112, - "description": "Stride applied to the CNN on the image. Should be between 1 and 224. Smaller values increase the computation time.", - "format": "integer", - "type": "number" + "model": { + "description": "Directory containing HDF5 or SavedModel/ProtoBuf models. Can be an absolute path or a path relative to the OCR-D resource location, the current working directory or the $SBB_BINARIZE_DATA environment variable (if set)", + "type": "string", + "format": "uri", + "content-type": "text/directory", + "required": true } }, - "steps": [ - "recognition/font-identification" + "resources": [ + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2020_01_16.zip", + "name": "default", + "type": "archive", + "path_in_archive": "saved_model_2020_01_16", + "size": 563147331, + "description": "default models provided by github.com/qurator-spk (SavedModel format)" + }, + { + "url": "https://github.com/qurator-spk/sbb_binarization/releases/download/v0.0.11/saved_model_2021_03_09.zip", + "name": "default-2021-03-09", + "type": "archive", + "path_in_archive": ".", + "size": 133230419, + "description": "updated default models provided by github.com/qurator-spk (SavedModel format)" + } ] + }, + "ocrd-page-transform": { + "executable": "ocrd-page-transform", + "description": "apply arbitrary XSL transformation file for PAGE-XML", + "parameters": { + "xsl": { + "description": "File path of the XSL transformation script", + "type": "string", + "format": "uri", + "content-type": "text/xsl", + "required": true + }, + "xslt-params": { + "description": "Assignment of XSL transformation parameter values, given as in `xmlstarlet` (which differentiates between `-s name=value` for literal `value` and `-p name=value` for XPath expression `value`), white-space separated.", + "type": "string", + "default": "" + }, + "pretty-print": { + "description": "Reformat with line breaks and this many spaces of indentation after XSL transformation (unless zero).", + "type": "number", + "format": "integer", + "default": 0 + }, + "mimetype": { + "description": "MIME type to register the output files under (should correspond to `xsl` result)", + "type": "string", + "default": "application/vnd.prima.page+xml" + } + } } } diff --git a/docs/ocrd-events.json b/docs/ocrd-events.json index e1f924d9b..91f88fae2 100644 --- a/docs/ocrd-events.json +++ b/docs/ocrd-events.json @@ -81,21 +81,5 @@ "dtstart": "2021-11-25T09:00:00", "tzid": "Europe/Berlin" } - }, - { - "title": "vBIB21", - "color": "black", - "rrule": { - "dtstart": "2021-12-01T10:00:00", - "tzid": "Europe/Berlin" - } - }, - { - "title": "vBIB21", - "color": "black", - "rrule": { - "dtstart": "2021-12-02T10:00:00", - "tzid": "Europe/Berlin" - } } ] diff --git a/repo/ocr-d.github.io b/repo/ocr-d.github.io index 17f52c645..8e9d01049 160000 --- a/repo/ocr-d.github.io +++ b/repo/ocr-d.github.io @@ -1 +1 @@ -Subproject commit 17f52c645915f33bbf4aae68b06a9264b291bb60 +Subproject commit 8e9d010490533cff11b1d14dbfeb1d04d73d58fe