From 18ffce8f106611320fee610aad4f2c20e877862a Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 12:38:48 +0100 Subject: [PATCH 01/29] hot_fix: missing mets server socket place holder in NF scripts --- .../default_workflow_with_MS.nf | 16 +++++++-------- .../odem_workflow_with_MS.nf | 20 +++++++++---------- .../sbb_workflow_with_MS.nf | 2 +- .../template_workflow_with_MS.nf | 2 +- .../operandi_utils/oton/nf_block_process.py | 9 +++++++-- .../operandi_utils/oton/nf_file_executable.py | 19 +++++++++++++----- .../oton/process_call_arguments.py | 7 ++++--- ...test_output_nextflow1_apptainer_with_MS.nf | 16 +++++++-------- .../test_output_nextflow1_docker_with_MS.nf | 16 +++++++-------- .../test_output_nextflow1_local_with_MS.nf | 16 +++++++-------- tests/tests_utils/test_2_oton/assert_utils.py | 2 ++ .../test_2_oton/test_4_nextflow_process.py | 2 +- 12 files changed, 72 insertions(+), 55 deletions(-) diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf index 42198e02..8abb7aa7 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf @@ -86,7 +86,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -110,7 +110,7 @@ process ocrd_anybaseocr_crop_1 { script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -134,7 +134,7 @@ process ocrd_skimage_binarize_2 { script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -158,7 +158,7 @@ process ocrd_skimage_denoise_3 { script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -182,7 +182,7 @@ process ocrd_tesserocr_deskew_4 { script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -206,7 +206,7 @@ process ocrd_cis_ocropy_segment_5 { script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -230,7 +230,7 @@ process ocrd_cis_ocropy_dewarp_6 { script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -254,7 +254,7 @@ process ocrd_calamari_recognize_7 { script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf index 07ba1866..cadaa38d 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf @@ -90,7 +90,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -114,7 +114,7 @@ process ocrd_anybaseocr_crop_1 { script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -138,7 +138,7 @@ process ocrd_cis_ocropy_denoise_2 { script: """ - ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -162,7 +162,7 @@ process ocrd_cis_ocropy_deskew_3 { script: """ - ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -186,7 +186,7 @@ process ocrd_tesserocr_segment_region_4 { script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' """ } @@ -210,7 +210,7 @@ process ocrd_segment_repair_5 { script: """ - ${params.env_wrapper_cmd_step5} ocrd-segment-repair -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' + ${params.env_wrapper_cmd_step5} ocrd-segment-repair -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' """ } @@ -234,7 +234,7 @@ process ocrd_cis_ocropy_clip_6 { script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -258,7 +258,7 @@ process ocrd_cis_ocropy_segment_7 { script: """ - ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -282,7 +282,7 @@ process ocrd_cis_ocropy_dewarp_8 { script: """ - ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -306,7 +306,7 @@ process ocrd_tesserocr_recognize_9 { script: """ - ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' + ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' """ } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf index 699b5805..12a2d9dd 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf @@ -72,7 +72,7 @@ process ocrd_tesserocr_recognize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' + ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' """ } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf index e95640ed..7d10fdca 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf @@ -72,7 +72,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } diff --git a/src/utils/operandi_utils/oton/nf_block_process.py b/src/utils/operandi_utils/oton/nf_block_process.py index 2f1cb832..29c114b9 100644 --- a/src/utils/operandi_utils/oton/nf_block_process.py +++ b/src/utils/operandi_utils/oton/nf_block_process.py @@ -4,7 +4,12 @@ class NextflowBlockProcess: - def __init__(self, processor_call_arguments: ProcessorCallArguments, index_pos: int, env_wrapper: bool = False): + def __init__( + self, processor_call_arguments: ProcessorCallArguments, + index_pos: int, + with_mets_server: bool, + env_wrapper: bool = False + ): self.logger = getLogger(__name__) self.logger.setLevel(getLevelName(OTON_LOG_LEVEL)) self.index_pos = str(index_pos) @@ -17,7 +22,7 @@ def __init__(self, processor_call_arguments: ProcessorCallArguments, index_pos: self.output_params = {} self.script = "" self.ocrd_command_bash = processor_call_arguments.dump_bash_form() - self.ocrd_command_bash_placeholders = processor_call_arguments.dump_bash_form_with_placeholders() + self.ocrd_command_bash_placeholders = processor_call_arguments.dump_bash_form_with_phs(with_mets_server) def add_directive(self, directive: str, value: str): if directive in self.directives: diff --git a/src/utils/operandi_utils/oton/nf_file_executable.py b/src/utils/operandi_utils/oton/nf_file_executable.py index 57d8d31c..adc26f5b 100644 --- a/src/utils/operandi_utils/oton/nf_file_executable.py +++ b/src/utils/operandi_utils/oton/nf_file_executable.py @@ -65,7 +65,11 @@ def build_parameters(self, environment: str, with_mets_server: bool): # TODO: Refactor later def build_split_page_ranges_process(self, environment: str, with_mets_server: bool) -> NextflowBlockProcess: - block = NextflowBlockProcess(ProcessorCallArguments(executable="split-page-ranges"), 0) + block = NextflowBlockProcess( + ProcessorCallArguments(executable="split-page-ranges"), + index_pos=0, + with_mets_server=with_mets_server + ) block.nf_process_name = "split_page_ranges" block.ocrd_command_bash = "" block.ocrd_command_bash_placeholders = "" @@ -110,8 +114,12 @@ def build_split_page_ranges_process(self, environment: str, with_mets_server: bo return block # TODO: Refactor later - def build_merge_mets_process(self, environment: str) -> NextflowBlockProcess: - block = NextflowBlockProcess(ProcessorCallArguments(executable="merging-mets"), 0) + def build_merge_mets_process(self, environment: str, with_mets_server: bool) -> NextflowBlockProcess: + block = NextflowBlockProcess( + ProcessorCallArguments(executable="merging-mets"), + index_pos=0, + with_mets_server=with_mets_server + ) block.nf_process_name = "merging_mets" block.ocrd_command_bash = "" block.ocrd_command_bash_placeholders = "" @@ -151,9 +159,10 @@ def build_nextflow_processes( index = 0 env_wrapper = True if environment == "docker" or environment == "apptainer" else False self.build_split_page_ranges_process(environment=environment, with_mets_server=with_mets_server) - self.build_merge_mets_process(environment=environment) + self.build_merge_mets_process(environment=environment, with_mets_server=with_mets_server) for processor in ocrd_processors: - nf_process_block = NextflowBlockProcess(processor, index, env_wrapper=env_wrapper) + nf_process_block = NextflowBlockProcess( + processor, index, with_mets_server=with_mets_server, env_wrapper=env_wrapper) # Add Nextflow process directives nf_process_block.add_directive(directive='debug', value='true') diff --git a/src/utils/operandi_utils/oton/process_call_arguments.py b/src/utils/operandi_utils/oton/process_call_arguments.py index def5aa47..ba9e5bd1 100644 --- a/src/utils/operandi_utils/oton/process_call_arguments.py +++ b/src/utils/operandi_utils/oton/process_call_arguments.py @@ -51,14 +51,15 @@ def dump_bash_form(self) -> str: dump += f" -p '{json_dumps(self.parameters)}'" return dump - def dump_bash_form_with_placeholders(self): + def dump_bash_form_with_phs(self, with_mets_socket: bool, with_page_id: bool = True): dump = '' dump += f'{self.executable}' - if self.mets_socket_path: + if with_mets_socket: dump += f' -U ${BS[0]}{CONST_METS_SOCKET_PATH}{BS[1]}' dump += f' -w ${BS[0]}{CONST_WORKSPACE_DIR}{BS[1]}' dump += f' -m ${BS[0]}{CONST_METS_PATH}{BS[1]}' - dump += f' --page-id ${BS[0]}{CONST_PAGE_RANGE}{BS[1]}' + if with_page_id: + dump += f' --page-id ${BS[0]}{CONST_PAGE_RANGE}{BS[1]}' dump += f' -I ${BS[0]}{CONST_DIR_IN}{BS[1]}' dump += f' -O ${BS[0]}{CONST_DIR_OUT}{BS[1]}' if self.parameters: diff --git a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf index 42198e02..8abb7aa7 100644 --- a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf @@ -86,7 +86,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -110,7 +110,7 @@ process ocrd_anybaseocr_crop_1 { script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -134,7 +134,7 @@ process ocrd_skimage_binarize_2 { script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -158,7 +158,7 @@ process ocrd_skimage_denoise_3 { script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -182,7 +182,7 @@ process ocrd_tesserocr_deskew_4 { script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -206,7 +206,7 @@ process ocrd_cis_ocropy_segment_5 { script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -230,7 +230,7 @@ process ocrd_cis_ocropy_dewarp_6 { script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -254,7 +254,7 @@ process ocrd_calamari_recognize_7 { script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } diff --git a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf index 49f0940e..be421dc3 100644 --- a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf @@ -74,7 +74,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -96,7 +96,7 @@ process ocrd_anybaseocr_crop_1 { script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -118,7 +118,7 @@ process ocrd_skimage_binarize_2 { script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -140,7 +140,7 @@ process ocrd_skimage_denoise_3 { script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -162,7 +162,7 @@ process ocrd_tesserocr_deskew_4 { script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -184,7 +184,7 @@ process ocrd_cis_ocropy_segment_5 { script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -206,7 +206,7 @@ process ocrd_cis_ocropy_dewarp_6 { script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -228,7 +228,7 @@ process ocrd_calamari_recognize_7 { script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } diff --git a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf index 29853558..e040aea1 100644 --- a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf @@ -72,7 +72,7 @@ process ocrd_cis_ocropy_binarize_0 { script: """ - ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -94,7 +94,7 @@ process ocrd_anybaseocr_crop_1 { script: """ - ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -116,7 +116,7 @@ process ocrd_skimage_binarize_2 { script: """ - ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -138,7 +138,7 @@ process ocrd_skimage_denoise_3 { script: """ - ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -160,7 +160,7 @@ process ocrd_tesserocr_deskew_4 { script: """ - ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -182,7 +182,7 @@ process ocrd_cis_ocropy_segment_5 { script: """ - ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -204,7 +204,7 @@ process ocrd_cis_ocropy_dewarp_6 { script: """ - ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -226,7 +226,7 @@ process ocrd_calamari_recognize_7 { script: """ - ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } diff --git a/tests/tests_utils/test_2_oton/assert_utils.py b/tests/tests_utils/test_2_oton/assert_utils.py index 11d334b7..f2e875fb 100644 --- a/tests/tests_utils/test_2_oton/assert_utils.py +++ b/tests/tests_utils/test_2_oton/assert_utils.py @@ -18,6 +18,8 @@ def assert_common_features( assert len(blocks_process) == num_blocks_process for block in blocks_process: assert "ocrd-" in block.ocrd_command_bash_placeholders + if with_mets_server: + assert "-U" in block.ocrd_command_bash_placeholders blocks_workflows = nextflow_file_class.nf_blocks_workflow assert len(blocks_workflows) == num_blocks_workflow for block in blocks_workflows: diff --git a/tests/tests_utils/test_2_oton/test_4_nextflow_process.py b/tests/tests_utils/test_2_oton/test_4_nextflow_process.py index fa7b3302..3c9a4c91 100644 --- a/tests/tests_utils/test_2_oton/test_4_nextflow_process.py +++ b/tests/tests_utils/test_2_oton/test_4_nextflow_process.py @@ -13,7 +13,7 @@ def test_line_append(): result = [] for ocrd_command in list_processor_call_arguments: index_pos = list_processor_call_arguments.index(ocrd_command) - nextflow_process = NextflowBlockProcess(ocrd_command, index_pos, env_wrapper=False) + nextflow_process = NextflowBlockProcess(ocrd_command, index_pos, with_mets_server=False, env_wrapper=False) result.append(nextflow_process.nf_process_name) expected = [ From 60ca64e830a8825f4feb40247f423331edfb41eb Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 13:22:21 +0100 Subject: [PATCH 02/29] fix: NF mets socket path param --- src/utils/operandi_utils/hpc/nhr_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/operandi_utils/hpc/nhr_executor.py b/src/utils/operandi_utils/hpc/nhr_executor.py index 1245f159..c1bd4aa0 100644 --- a/src/utils/operandi_utils/hpc/nhr_executor.py +++ b/src/utils/operandi_utils/hpc/nhr_executor.py @@ -198,7 +198,7 @@ def cmd_nextflow_run( nf_run_command += f" --input_file_group {input_file_grp}" nf_run_command += f" --mets_path /ws_data/{mets_basename}" if use_mets_server: - nf_run_command += f" --mets_socket /ws_data/mets_server.sock" + nf_run_command += f" --mets_socket_path /ws_data/mets_server.sock" nf_run_command += f" --workspace_dir /ws_data" nf_run_command += f" --pages {ws_pages_amount}" From de57eefb80470f9caf68b118d279811f8cf8fdac Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 13:48:42 +0100 Subject: [PATCH 03/29] make RabbitMQ connection more resilient --- src/utils/operandi_utils/rabbitmq/connector.py | 11 +++++++---- src/utils/operandi_utils/rabbitmq/constants.py | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/utils/operandi_utils/rabbitmq/connector.py b/src/utils/operandi_utils/rabbitmq/connector.py index b1de054a..d8379309 100644 --- a/src/utils/operandi_utils/rabbitmq/connector.py +++ b/src/utils/operandi_utils/rabbitmq/connector.py @@ -3,7 +3,10 @@ from pika import BasicProperties, BlockingConnection, ConnectionParameters, PlainCredentials from pika.adapters.blocking_connection import BlockingChannel -from .constants import DEFAULT_EXCHANGER_NAME, DEFAULT_EXCHANGER_TYPE, PREFETCH_COUNT, RABBITMQ_QUEUE_DEFAULT +from .constants import ( + DEFAULT_EXCHANGER_NAME, DEFAULT_EXCHANGER_TYPE, HEARTBEAT, PREFETCH_COUNT, RABBITMQ_QUEUE_DEFAULT, RECONNECT_TRIES, + RECONNECT_WAIT +) class RMQConnector: @@ -36,12 +39,12 @@ def declare_and_bind_defaults(connection: BlockingConnection, channel: BlockingC channel, queue_name=RABBITMQ_QUEUE_DEFAULT, exchange_name=DEFAULT_EXCHANGER_NAME, routing_key=RABBITMQ_QUEUE_DEFAULT) # Bind the default queue to the default exchange - # Connection related methods @staticmethod def open_blocking_connection(credentials: PlainCredentials, host: str, port: int, vhost: str) -> BlockingConnection: - # TODO: The heartbeat should not be disabled (0)! connection_params = ConnectionParameters( - host=host, port=port, virtual_host=vhost, credentials=credentials, heartbeat=0) + host=host, port=port, virtual_host=vhost, credentials=credentials, heartbeat=HEARTBEAT, + connection_attempts=RECONNECT_TRIES, retry_delay=RECONNECT_WAIT + ) return BlockingConnection(parameters=connection_params) @staticmethod diff --git a/src/utils/operandi_utils/rabbitmq/constants.py b/src/utils/operandi_utils/rabbitmq/constants.py index 237486e7..e260ca39 100644 --- a/src/utils/operandi_utils/rabbitmq/constants.py +++ b/src/utils/operandi_utils/rabbitmq/constants.py @@ -5,10 +5,12 @@ RABBITMQ_QUEUE_JOB_STATUSES: str = "operandi_queue_job_statuses" RABBITMQ_QUEUE_USERS: str = "operandi_queue_users" +# Defines how often the RabbitMQ broker will send requests to the clients to verify their live state +HEARTBEAT: int = 60 # Wait seconds before next reconnect try -RECONNECT_WAIT: int = 5 +RECONNECT_WAIT: int = 3 # Reconnect tries before timeout -RECONNECT_TRIES: int = 3 +RECONNECT_TRIES: int = 100 # QOS, i.e., how many messages to consume in a single go # Check here: https://www.rabbitmq.com/consumer-prefetch.html PREFETCH_COUNT: int = 1 From 52603f638f3cff0c9273ea260ca3c39ff209dd97 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 13:49:36 +0100 Subject: [PATCH 04/29] remove: no longer needed sleep in the ci cd --- .github/workflows/ci_cd.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 555f603d..91b7a92d 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -120,9 +120,6 @@ jobs: - name: start MongoDB run: docker compose -f ./docker-compose.yml --env-file ${{ env.ENV_FILE }} up -d operandi-mongodb - - name: wait starting of RabbitMQ and MongoDB - run: sleep 45 - - name: run utils tests run: | export $(shell sed 's/=.*//' ${{ env.ENV_FILE }}) From 07d276b53162644af0e273350ec8932b40adc443 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 14:10:06 +0100 Subject: [PATCH 05/29] try to fix: docker services in ci cd --- .github/workflows/ci_cd.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index 91b7a92d..7ca2c5d3 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -114,6 +114,9 @@ jobs: echo "$SSH_KEY_HPC" > /home/runner/.ssh/key_hpc chmod 600 /home/runner/.ssh/key_hpc + - name: stop RabbitMQ and MongoDB instances if still running + run: docker compose -f ./docker-compose.yml --env-file ${{ env.ENV_FILE }} down --remove-orphans + - name: start RabbitMQ Server run: docker compose -f ./docker-compose.yml --env-file ${{ env.ENV_FILE }} up -d operandi-rabbitmq From 52d718e887436c02c1cf8128bfaedb0e556444d9 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 14:59:46 +0100 Subject: [PATCH 06/29] fix: nf workflows with mets server --- .../nextflow_workflows/default_workflow.nf | 48 ++++-------- .../default_workflow_with_MS.nf | 48 ++++-------- .../hpc/nextflow_workflows/odem_workflow.nf | 60 +++++--------- .../odem_workflow_with_MS.nf | 60 +++++--------- .../hpc/nextflow_workflows/sbb_workflow.nf | 6 +- .../sbb_workflow_with_MS.nf | 6 +- .../nextflow_workflows/template_workflow.nf | 6 +- .../template_workflow_with_MS.nf | 6 +- .../operandi_utils/oton/nf_block_workflow.py | 8 +- .../operandi_utils/oton/nf_file_executable.py | 22 +----- .../oton/process_call_arguments.py | 8 +- tests/assets/oton/constants.py | 78 +++++++++---------- .../oton/test_output_nextflow1_apptainer.nf | 48 ++++-------- ...test_output_nextflow1_apptainer_with_MS.nf | 48 ++++-------- .../oton/test_output_nextflow1_docker.nf | 48 ++++-------- .../test_output_nextflow1_docker_with_MS.nf | 48 ++++-------- .../oton/test_output_nextflow1_local.nf | 48 ++++-------- .../test_output_nextflow1_local_with_MS.nf | 48 ++++-------- tests/assets/oton/test_output_nextflow2.nf | 42 ++++------ tests/assets/oton/test_output_nextflow3.nf | 18 ++--- tests/assets/oton/test_output_nextflow4.nf | 78 +++++++------------ 21 files changed, 272 insertions(+), 510 deletions(-) diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf index d3ddb39b..4e9c0699 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -243,18 +229,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -279,13 +263,13 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1]) } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf index 8abb7aa7..ba9bf528 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -243,18 +229,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -262,12 +246,12 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf index c9589b8a..a40c8b33 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf @@ -79,18 +79,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -103,18 +101,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -127,18 +123,16 @@ process ocrd_cis_ocropy_denoise_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -151,18 +145,16 @@ process ocrd_cis_ocropy_deskew_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -175,18 +167,16 @@ process ocrd_tesserocr_segment_region_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' """ } @@ -199,18 +189,16 @@ process ocrd_segment_repair_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-segment-repair -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' + ${params.env_wrapper_cmd_step5} ocrd-segment-repair -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' """ } @@ -223,18 +211,16 @@ process ocrd_cis_ocropy_clip_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -247,18 +233,16 @@ process ocrd_cis_ocropy_segment_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -271,18 +255,16 @@ process ocrd_cis_ocropy_dewarp_8 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -295,18 +277,16 @@ process ocrd_tesserocr_recognize_9 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' + ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' """ } @@ -331,15 +311,15 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BINPAGE") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BINPAGE", "OCR-D-SEG-PAGE-ANYOCR") - ocrd_cis_ocropy_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-SEG-PAGE-ANYOCR", "OCR-D-DENOISE-OCROPY") - ocrd_cis_ocropy_deskew_3(ocrd_cis_ocropy_denoise_2.out[0], ocrd_cis_ocropy_denoise_2.out[1], ocrd_cis_ocropy_denoise_2.out[2], "OCR-D-DENOISE-OCROPY", "OCR-D-DESKEW-OCROPY") - ocrd_tesserocr_segment_region_4(ocrd_cis_ocropy_deskew_3.out[0], ocrd_cis_ocropy_deskew_3.out[1], ocrd_cis_ocropy_deskew_3.out[2], "OCR-D-DESKEW-OCROPY", "OCR-D-SEG-BLOCK-TESSERACT") - ocrd_segment_repair_5(ocrd_tesserocr_segment_region_4.out[0], ocrd_tesserocr_segment_region_4.out[1], ocrd_tesserocr_segment_region_4.out[2], "OCR-D-SEG-BLOCK-TESSERACT", "OCR-D-SEGMENT-REPAIR") - ocrd_cis_ocropy_clip_6(ocrd_segment_repair_5.out[0], ocrd_segment_repair_5.out[1], ocrd_segment_repair_5.out[2], "OCR-D-SEGMENT-REPAIR", "OCR-D-CLIP") - ocrd_cis_ocropy_segment_7(ocrd_cis_ocropy_clip_6.out[0], ocrd_cis_ocropy_clip_6.out[1], ocrd_cis_ocropy_clip_6.out[2], "OCR-D-CLIP", "OCR-D-SEGMENT-OCROPY") - ocrd_cis_ocropy_dewarp_8(ocrd_cis_ocropy_segment_7.out[0], ocrd_cis_ocropy_segment_7.out[1], ocrd_cis_ocropy_segment_7.out[2], "OCR-D-SEGMENT-OCROPY", "OCR-D-DEWARP") - ocrd_tesserocr_recognize_9(ocrd_cis_ocropy_dewarp_8.out[0], ocrd_cis_ocropy_dewarp_8.out[1], ocrd_cis_ocropy_dewarp_8.out[2], "OCR-D-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BINPAGE") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BINPAGE", "OCR-D-SEG-PAGE-ANYOCR") + ocrd_cis_ocropy_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-SEG-PAGE-ANYOCR", "OCR-D-DENOISE-OCROPY") + ocrd_cis_ocropy_deskew_3(ocrd_cis_ocropy_denoise_2.out[0], ocrd_cis_ocropy_denoise_2.out[1], "OCR-D-DENOISE-OCROPY", "OCR-D-DESKEW-OCROPY") + ocrd_tesserocr_segment_region_4(ocrd_cis_ocropy_deskew_3.out[0], ocrd_cis_ocropy_deskew_3.out[1], "OCR-D-DESKEW-OCROPY", "OCR-D-SEG-BLOCK-TESSERACT") + ocrd_segment_repair_5(ocrd_tesserocr_segment_region_4.out[0], ocrd_tesserocr_segment_region_4.out[1], "OCR-D-SEG-BLOCK-TESSERACT", "OCR-D-SEGMENT-REPAIR") + ocrd_cis_ocropy_clip_6(ocrd_segment_repair_5.out[0], ocrd_segment_repair_5.out[1], "OCR-D-SEGMENT-REPAIR", "OCR-D-CLIP") + ocrd_cis_ocropy_segment_7(ocrd_cis_ocropy_clip_6.out[0], ocrd_cis_ocropy_clip_6.out[1], "OCR-D-CLIP", "OCR-D-SEGMENT-OCROPY") + ocrd_cis_ocropy_dewarp_8(ocrd_cis_ocropy_segment_7.out[0], ocrd_cis_ocropy_segment_7.out[1], "OCR-D-SEGMENT-OCROPY", "OCR-D-DEWARP") + ocrd_tesserocr_recognize_9(ocrd_cis_ocropy_dewarp_8.out[0], ocrd_cis_ocropy_dewarp_8.out[1], "OCR-D-DEWARP", "OCR-D-OCR") merging_mets(ocrd_tesserocr_recognize_9.out[0], ocrd_tesserocr_recognize_9.out[1]) } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf index cadaa38d..7fc2a553 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf @@ -79,18 +79,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -103,18 +101,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -127,18 +123,16 @@ process ocrd_cis_ocropy_denoise_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step2} ocrd-cis-ocropy-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -151,18 +145,16 @@ process ocrd_cis_ocropy_deskew_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-cis-ocropy-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -175,18 +167,16 @@ process ocrd_tesserocr_segment_region_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-segment-region -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"padding": 5.0, "find_tables": false, "dpi": 300}' """ } @@ -199,18 +189,16 @@ process ocrd_segment_repair_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-segment-repair -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' + ${params.env_wrapper_cmd_step5} ocrd-segment-repair -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true, "plausibilize_merge_min_overlap": 0.7}' """ } @@ -223,18 +211,16 @@ process ocrd_cis_ocropy_clip_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-clip -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -247,18 +233,16 @@ process ocrd_cis_ocropy_segment_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' + ${params.env_wrapper_cmd_step7} ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"dpi": 300}' """ } @@ -271,18 +255,16 @@ process ocrd_cis_ocropy_dewarp_8 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step8} ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -295,18 +277,16 @@ process ocrd_tesserocr_recognize_9 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' + ${params.env_wrapper_cmd_step9} ocrd-tesserocr-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"model": "Fraktur"}' """ } @@ -314,14 +294,14 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BINPAGE") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BINPAGE", "OCR-D-SEG-PAGE-ANYOCR") - ocrd_cis_ocropy_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-SEG-PAGE-ANYOCR", "OCR-D-DENOISE-OCROPY") - ocrd_cis_ocropy_deskew_3(ocrd_cis_ocropy_denoise_2.out[0], ocrd_cis_ocropy_denoise_2.out[1], ocrd_cis_ocropy_denoise_2.out[2], "OCR-D-DENOISE-OCROPY", "OCR-D-DESKEW-OCROPY") - ocrd_tesserocr_segment_region_4(ocrd_cis_ocropy_deskew_3.out[0], ocrd_cis_ocropy_deskew_3.out[1], ocrd_cis_ocropy_deskew_3.out[2], "OCR-D-DESKEW-OCROPY", "OCR-D-SEG-BLOCK-TESSERACT") - ocrd_segment_repair_5(ocrd_tesserocr_segment_region_4.out[0], ocrd_tesserocr_segment_region_4.out[1], ocrd_tesserocr_segment_region_4.out[2], "OCR-D-SEG-BLOCK-TESSERACT", "OCR-D-SEGMENT-REPAIR") - ocrd_cis_ocropy_clip_6(ocrd_segment_repair_5.out[0], ocrd_segment_repair_5.out[1], ocrd_segment_repair_5.out[2], "OCR-D-SEGMENT-REPAIR", "OCR-D-CLIP") - ocrd_cis_ocropy_segment_7(ocrd_cis_ocropy_clip_6.out[0], ocrd_cis_ocropy_clip_6.out[1], ocrd_cis_ocropy_clip_6.out[2], "OCR-D-CLIP", "OCR-D-SEGMENT-OCROPY") - ocrd_cis_ocropy_dewarp_8(ocrd_cis_ocropy_segment_7.out[0], ocrd_cis_ocropy_segment_7.out[1], ocrd_cis_ocropy_segment_7.out[2], "OCR-D-SEGMENT-OCROPY", "OCR-D-DEWARP") - ocrd_tesserocr_recognize_9(ocrd_cis_ocropy_dewarp_8.out[0], ocrd_cis_ocropy_dewarp_8.out[1], ocrd_cis_ocropy_dewarp_8.out[2], "OCR-D-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BINPAGE") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BINPAGE", "OCR-D-SEG-PAGE-ANYOCR") + ocrd_cis_ocropy_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-SEG-PAGE-ANYOCR", "OCR-D-DENOISE-OCROPY") + ocrd_cis_ocropy_deskew_3(ocrd_cis_ocropy_denoise_2.out[0], ocrd_cis_ocropy_denoise_2.out[1], "OCR-D-DENOISE-OCROPY", "OCR-D-DESKEW-OCROPY") + ocrd_tesserocr_segment_region_4(ocrd_cis_ocropy_deskew_3.out[0], ocrd_cis_ocropy_deskew_3.out[1], "OCR-D-DESKEW-OCROPY", "OCR-D-SEG-BLOCK-TESSERACT") + ocrd_segment_repair_5(ocrd_tesserocr_segment_region_4.out[0], ocrd_tesserocr_segment_region_4.out[1], "OCR-D-SEG-BLOCK-TESSERACT", "OCR-D-SEGMENT-REPAIR") + ocrd_cis_ocropy_clip_6(ocrd_segment_repair_5.out[0], ocrd_segment_repair_5.out[1], "OCR-D-SEGMENT-REPAIR", "OCR-D-CLIP") + ocrd_cis_ocropy_segment_7(ocrd_cis_ocropy_clip_6.out[0], ocrd_cis_ocropy_clip_6.out[1], "OCR-D-CLIP", "OCR-D-SEGMENT-OCROPY") + ocrd_cis_ocropy_dewarp_8(ocrd_cis_ocropy_segment_7.out[0], ocrd_cis_ocropy_segment_7.out[1], "OCR-D-SEGMENT-OCROPY", "OCR-D-DEWARP") + ocrd_tesserocr_recognize_9(ocrd_cis_ocropy_dewarp_8.out[0], ocrd_cis_ocropy_dewarp_8.out[1], "OCR-D-DEWARP", "OCR-D-OCR") } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf index 42851773..559c6b8b 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf @@ -61,18 +61,16 @@ process ocrd_tesserocr_recognize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' + ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' """ } @@ -97,6 +95,6 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_tesserocr_recognize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-OCR") + ocrd_tesserocr_recognize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-OCR") merging_mets(ocrd_tesserocr_recognize_0.out[0], ocrd_tesserocr_recognize_0.out[1]) } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf index 12a2d9dd..b6e12509 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf @@ -61,18 +61,16 @@ process ocrd_tesserocr_recognize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' + ${params.env_wrapper_cmd_step0} ocrd-tesserocr-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"segmentation_level": "region", "textequiv_level": "word", "find_tables": true, "model": "deu"}' """ } @@ -80,5 +78,5 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_tesserocr_recognize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-OCR") + ocrd_tesserocr_recognize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-OCR") } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf index 4dfa4a67..537ac15f 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf @@ -61,18 +61,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -97,6 +95,6 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") merging_mets(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1]) } diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf index 7d10fdca..0d006429 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf @@ -61,18 +61,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -80,5 +78,5 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") } diff --git a/src/utils/operandi_utils/oton/nf_block_workflow.py b/src/utils/operandi_utils/oton/nf_block_workflow.py index e37b64ec..42cbd81c 100644 --- a/src/utils/operandi_utils/oton/nf_block_workflow.py +++ b/src/utils/operandi_utils/oton/nf_block_workflow.py @@ -1,8 +1,6 @@ from logging import getLevelName, getLogger from typing import List -from operandi_utils.oton.constants import ( - OTON_LOG_LEVEL, PARAMS_KEY_WORKSPACE_DIR, PARAMS_KEY_INPUT_FILE_GRP, PARAMS_KEY_FORKS, SPACES -) +from operandi_utils.oton.constants import OTON_LOG_LEVEL, PARAMS_KEY_INPUT_FILE_GRP, PARAMS_KEY_FORKS, SPACES from operandi_utils.oton.nf_block_process import NextflowBlockProcess class NextflowBlockWorkflow: @@ -37,11 +35,11 @@ def produce_workflow_calls( if previous_nfp is None: workflow_call += ( f'{nf_split_page_ranges.nf_process_name}.out[0], {nf_split_page_ranges.nf_process_name}.out[1], ' - f'{PARAMS_KEY_WORKSPACE_DIR}, {PARAMS_KEY_INPUT_FILE_GRP}, "{out_file_grps}"' + f'{PARAMS_KEY_INPUT_FILE_GRP}, "{out_file_grps}"' ) else: workflow_call += ( - f'{previous_nfp}.out[0], {previous_nfp}.out[1], {previous_nfp}.out[2], "{in_file_grps}",' + f'{previous_nfp}.out[0], {previous_nfp}.out[1], "{in_file_grps}",' f' "{out_file_grps}"' ) workflow_call += ")\n" diff --git a/src/utils/operandi_utils/oton/nf_file_executable.py b/src/utils/operandi_utils/oton/nf_file_executable.py index adc26f5b..437c2bd7 100644 --- a/src/utils/operandi_utils/oton/nf_file_executable.py +++ b/src/utils/operandi_utils/oton/nf_file_executable.py @@ -3,22 +3,10 @@ from operandi_utils.oton.ocrd_validator import ProcessorCallArguments from operandi_utils.oton.constants import ( - BS, CONST_DIR_IN, CONST_DIR_OUT, CONST_PAGE_RANGE, CONST_METS_PATH, CONST_WORKSPACE_DIR, - OTON_LOG_LEVEL, - PARAMS_KEY_INPUT_FILE_GRP, - PARAMS_KEY_METS_PATH, - PARAMS_KEY_WORKSPACE_DIR, - PARAMS_KEY_ENV_WRAPPER_CMD_CORE, - PARAMS_KEY_ENV_WRAPPER_CMD_STEP, - PARAMS_KEY_FORKS, - PARAMS_KEY_PAGES, - PARAMS_KEY_CPUS, - PARAMS_KEY_CPUS_PER_FORK, - PARAMS_KEY_RAM, - PARAMS_KEY_RAM_PER_FORK, - PARAMS_KEY_METS_SOCKET_PATH, - SPACES, - WORKFLOW_COMMENT + BS, CONST_DIR_IN, CONST_DIR_OUT, CONST_PAGE_RANGE, CONST_METS_PATH, OTON_LOG_LEVEL, SPACES, WORKFLOW_COMMENT, + PARAMS_KEY_INPUT_FILE_GRP, PARAMS_KEY_METS_PATH, PARAMS_KEY_WORKSPACE_DIR, PARAMS_KEY_ENV_WRAPPER_CMD_CORE, + PARAMS_KEY_ENV_WRAPPER_CMD_STEP, PARAMS_KEY_FORKS, PARAMS_KEY_PAGES, PARAMS_KEY_CPUS, PARAMS_KEY_CPUS_PER_FORK, + PARAMS_KEY_RAM, PARAMS_KEY_RAM_PER_FORK, PARAMS_KEY_METS_SOCKET_PATH, ) from operandi_utils.oton.nf_block_process import NextflowBlockProcess from operandi_utils.oton.nf_block_workflow import NextflowBlockWorkflow @@ -174,13 +162,11 @@ def build_nextflow_processes( # Add Nextflow process parameters nf_process_block.add_parameter_input(parameter=CONST_METS_PATH, parameter_type='val') nf_process_block.add_parameter_input(parameter=CONST_PAGE_RANGE, parameter_type='val') - nf_process_block.add_parameter_input(parameter=CONST_WORKSPACE_DIR, parameter_type='val') nf_process_block.add_parameter_input(parameter=CONST_DIR_IN, parameter_type='val') nf_process_block.add_parameter_input(parameter=CONST_DIR_OUT, parameter_type='val') nf_process_block.add_parameter_output(parameter=CONST_METS_PATH, parameter_type='val') nf_process_block.add_parameter_output(parameter=CONST_PAGE_RANGE, parameter_type='val') - nf_process_block.add_parameter_output(parameter=CONST_WORKSPACE_DIR, parameter_type='val') self.nf_lines_parameters[f'{PARAMS_KEY_ENV_WRAPPER_CMD_STEP}{index}'] = '"null"' self.nf_blocks_process.append(nf_process_block) index += 1 diff --git a/src/utils/operandi_utils/oton/process_call_arguments.py b/src/utils/operandi_utils/oton/process_call_arguments.py index ba9e5bd1..62f55de3 100644 --- a/src/utils/operandi_utils/oton/process_call_arguments.py +++ b/src/utils/operandi_utils/oton/process_call_arguments.py @@ -2,8 +2,8 @@ from logging import getLevelName, getLogger from typing import Optional from operandi_utils.oton.constants import ( - BS, CONST_DIR_IN, CONST_DIR_OUT, CONST_WORKSPACE_DIR, CONST_METS_PATH, CONST_PAGE_RANGE, CONST_METS_SOCKET_PATH, - OCRD_ALL_JSON, OTON_LOG_LEVEL + BS, CONST_DIR_IN, CONST_DIR_OUT, CONST_METS_PATH, CONST_PAGE_RANGE, OCRD_ALL_JSON, OTON_LOG_LEVEL, + PARAMS_KEY_METS_SOCKET_PATH, PARAMS_KEY_WORKSPACE_DIR ) # This class is based on ocrd.task_sequence.ProcessorTask @@ -55,8 +55,8 @@ def dump_bash_form_with_phs(self, with_mets_socket: bool, with_page_id: bool = T dump = '' dump += f'{self.executable}' if with_mets_socket: - dump += f' -U ${BS[0]}{CONST_METS_SOCKET_PATH}{BS[1]}' - dump += f' -w ${BS[0]}{CONST_WORKSPACE_DIR}{BS[1]}' + dump += f' -U ${BS[0]}{PARAMS_KEY_METS_SOCKET_PATH}{BS[1]}' + dump += f' -w ${BS[0]}{PARAMS_KEY_WORKSPACE_DIR}{BS[1]}' dump += f' -m ${BS[0]}{CONST_METS_PATH}{BS[1]}' if with_page_id: dump += f' --page-id ${BS[0]}{CONST_PAGE_RANGE}{BS[1]}' diff --git a/tests/assets/oton/constants.py b/tests/assets/oton/constants.py index 6e540e84..67fc854e 100644 --- a/tests/assets/oton/constants.py +++ b/tests/assets/oton/constants.py @@ -27,14 +27,14 @@ main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1]) } """ @@ -44,14 +44,14 @@ main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") } """ @@ -60,13 +60,13 @@ main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_3(ocrd_skimage_denoise_2.out[0], ocrd_skimage_denoise_2.out[1], ocrd_skimage_denoise_2.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_tesserocr_segment_4(ocrd_tesserocr_deskew_3.out[0], ocrd_tesserocr_deskew_3.out[1], ocrd_tesserocr_deskew_3.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_5(ocrd_tesserocr_segment_4.out[0], ocrd_tesserocr_segment_4.out[1], ocrd_tesserocr_segment_4.out[2], "OCR-D-SEG", "OCR-D-SEG-DEWARP") - ocrd_tesserocr_recognize_6(ocrd_cis_ocropy_dewarp_5.out[0], ocrd_cis_ocropy_dewarp_5.out[1], ocrd_cis_ocropy_dewarp_5.out[2], "OCR-D-SEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_3(ocrd_skimage_denoise_2.out[0], ocrd_skimage_denoise_2.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_tesserocr_segment_4(ocrd_tesserocr_deskew_3.out[0], ocrd_tesserocr_deskew_3.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_5(ocrd_tesserocr_segment_4.out[0], ocrd_tesserocr_segment_4.out[1], "OCR-D-SEG", "OCR-D-SEG-DEWARP") + ocrd_tesserocr_recognize_6(ocrd_cis_ocropy_dewarp_5.out[0], ocrd_cis_ocropy_dewarp_5.out[1], "OCR-D-SEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_tesserocr_recognize_6.out[0], ocrd_tesserocr_recognize_6.out[1]) } """ @@ -76,9 +76,9 @@ main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_dinglehopper_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-EVAL-SEG-BLOCK") - ocrd_dinglehopper_1(ocrd_dinglehopper_0.out[0], ocrd_dinglehopper_0.out[1], ocrd_dinglehopper_0.out[2], "OCR-D-GT-SEG-LINE,OCR-D-OCR", "OCR-D-EVAL-SEG-LINE") - ocrd_dinglehopper_2(ocrd_dinglehopper_1.out[0], ocrd_dinglehopper_1.out[1], ocrd_dinglehopper_1.out[2], "OCR-D-GT-SEG-PAGE,OCR-D-OCR", "OCR-D-EVAL-SEG-PAGE") + ocrd_dinglehopper_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-EVAL-SEG-BLOCK") + ocrd_dinglehopper_1(ocrd_dinglehopper_0.out[0], ocrd_dinglehopper_0.out[1], "OCR-D-GT-SEG-LINE,OCR-D-OCR", "OCR-D-EVAL-SEG-LINE") + ocrd_dinglehopper_2(ocrd_dinglehopper_1.out[0], ocrd_dinglehopper_1.out[1], "OCR-D-GT-SEG-PAGE,OCR-D-OCR", "OCR-D-EVAL-SEG-PAGE") merging_mets(ocrd_dinglehopper_2.out[0], ocrd_dinglehopper_2.out[1]) } """ @@ -88,19 +88,19 @@ main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_olena_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_olena_binarize_0.out[0], ocrd_olena_binarize_0.out[1], ocrd_olena_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_olena_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_cis_ocropy_denoise_3(ocrd_olena_binarize_2.out[0], ocrd_olena_binarize_2.out[1], ocrd_olena_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_cis_ocropy_deskew_4(ocrd_cis_ocropy_denoise_3.out[0], ocrd_cis_ocropy_denoise_3.out[1], ocrd_cis_ocropy_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_tesserocr_segment_region_5(ocrd_cis_ocropy_deskew_4.out[0], ocrd_cis_ocropy_deskew_4.out[1], ocrd_cis_ocropy_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG-REG") - ocrd_segment_repair_6(ocrd_tesserocr_segment_region_5.out[0], ocrd_tesserocr_segment_region_5.out[1], ocrd_tesserocr_segment_region_5.out[2], "OCR-D-SEG-REG", "OCR-D-SEG-REPAIR") - ocrd_cis_ocropy_deskew_7(ocrd_segment_repair_6.out[0], ocrd_segment_repair_6.out[1], ocrd_segment_repair_6.out[2], "OCR-D-SEG-REPAIR", "OCR-D-SEG-REG-DESKEW") - ocrd_cis_ocropy_clip_8(ocrd_cis_ocropy_deskew_7.out[0], ocrd_cis_ocropy_deskew_7.out[1], ocrd_cis_ocropy_deskew_7.out[2], "OCR-D-SEG-REG-DESKEW", "OCR-D-SEG-REG-DESKEW-CLIP") - ocrd_tesserocr_segment_line_9(ocrd_cis_ocropy_clip_8.out[0], ocrd_cis_ocropy_clip_8.out[1], ocrd_cis_ocropy_clip_8.out[2], "OCR-D-SEG-REG-DESKEW-CLIP", "OCR-D-SEG-LINE") - ocrd_segment_repair_10(ocrd_tesserocr_segment_line_9.out[0], ocrd_tesserocr_segment_line_9.out[1], ocrd_tesserocr_segment_line_9.out[2], "OCR-D-SEG-LINE", "OCR-D-SEG-REPAIR-LINE") - ocrd_cis_ocropy_dewarp_11(ocrd_segment_repair_10.out[0], ocrd_segment_repair_10.out[1], ocrd_segment_repair_10.out[2], "OCR-D-SEG-REPAIR-LINE", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_12(ocrd_cis_ocropy_dewarp_11.out[0], ocrd_cis_ocropy_dewarp_11.out[1], ocrd_cis_ocropy_dewarp_11.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_olena_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_olena_binarize_0.out[0], ocrd_olena_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_olena_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_cis_ocropy_denoise_3(ocrd_olena_binarize_2.out[0], ocrd_olena_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_cis_ocropy_deskew_4(ocrd_cis_ocropy_denoise_3.out[0], ocrd_cis_ocropy_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_tesserocr_segment_region_5(ocrd_cis_ocropy_deskew_4.out[0], ocrd_cis_ocropy_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG-REG") + ocrd_segment_repair_6(ocrd_tesserocr_segment_region_5.out[0], ocrd_tesserocr_segment_region_5.out[1], "OCR-D-SEG-REG", "OCR-D-SEG-REPAIR") + ocrd_cis_ocropy_deskew_7(ocrd_segment_repair_6.out[0], ocrd_segment_repair_6.out[1], "OCR-D-SEG-REPAIR", "OCR-D-SEG-REG-DESKEW") + ocrd_cis_ocropy_clip_8(ocrd_cis_ocropy_deskew_7.out[0], ocrd_cis_ocropy_deskew_7.out[1], "OCR-D-SEG-REG-DESKEW", "OCR-D-SEG-REG-DESKEW-CLIP") + ocrd_tesserocr_segment_line_9(ocrd_cis_ocropy_clip_8.out[0], ocrd_cis_ocropy_clip_8.out[1], "OCR-D-SEG-REG-DESKEW-CLIP", "OCR-D-SEG-LINE") + ocrd_segment_repair_10(ocrd_tesserocr_segment_line_9.out[0], ocrd_tesserocr_segment_line_9.out[1], "OCR-D-SEG-LINE", "OCR-D-SEG-REPAIR-LINE") + ocrd_cis_ocropy_dewarp_11(ocrd_segment_repair_10.out[0], ocrd_segment_repair_10.out[1], "OCR-D-SEG-REPAIR-LINE", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_12(ocrd_cis_ocropy_dewarp_11.out[0], ocrd_cis_ocropy_dewarp_11.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_12.out[0], ocrd_calamari_recognize_12.out[1]) } """ diff --git a/tests/assets/oton/test_output_nextflow1_apptainer.nf b/tests/assets/oton/test_output_nextflow1_apptainer.nf index d3ddb39b..4e9c0699 100644 --- a/tests/assets/oton/test_output_nextflow1_apptainer.nf +++ b/tests/assets/oton/test_output_nextflow1_apptainer.nf @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -243,18 +229,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -279,13 +263,13 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1]) } diff --git a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf index 8abb7aa7..ba9bf528 100644 --- a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -243,18 +229,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -262,12 +246,12 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") } diff --git a/tests/assets/oton/test_output_nextflow1_docker.nf b/tests/assets/oton/test_output_nextflow1_docker.nf index 7598d3d7..fdf03c09 100644 --- a/tests/assets/oton/test_output_nextflow1_docker.nf +++ b/tests/assets/oton/test_output_nextflow1_docker.nf @@ -63,18 +63,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -85,18 +83,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -107,18 +103,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -129,18 +123,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -151,18 +143,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -173,18 +163,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -195,18 +183,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -217,18 +203,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -251,13 +235,13 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1]) } diff --git a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf index be421dc3..05e8966d 100644 --- a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf @@ -63,18 +63,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -85,18 +83,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -107,18 +103,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -129,18 +123,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -151,18 +143,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -173,18 +163,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -195,18 +183,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -217,18 +203,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -236,12 +220,12 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") } diff --git a/tests/assets/oton/test_output_nextflow1_local.nf b/tests/assets/oton/test_output_nextflow1_local.nf index 726402e8..ced217c9 100644 --- a/tests/assets/oton/test_output_nextflow1_local.nf +++ b/tests/assets/oton/test_output_nextflow1_local.nf @@ -61,18 +61,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -83,18 +81,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -105,18 +101,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ocrd-skimage-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -127,18 +121,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -149,18 +141,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -171,18 +161,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -193,18 +181,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -215,18 +201,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -249,13 +233,13 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1]) } diff --git a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf index e040aea1..e11f4346 100644 --- a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf @@ -61,18 +61,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -83,18 +81,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -105,18 +101,16 @@ process ocrd_skimage_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' + ocrd-skimage-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}' """ } @@ -127,18 +121,16 @@ process ocrd_skimage_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-skimage-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -149,18 +141,16 @@ process ocrd_tesserocr_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ocrd-tesserocr-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -171,18 +161,16 @@ process ocrd_cis_ocropy_segment_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -193,18 +181,16 @@ process ocrd_cis_ocropy_dewarp_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -215,18 +201,16 @@ process ocrd_calamari_recognize_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ocrd-calamari-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -234,12 +218,12 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") } diff --git a/tests/assets/oton/test_output_nextflow2.nf b/tests/assets/oton/test_output_nextflow2.nf index 62b4c6be..a2798bdf 100644 --- a/tests/assets/oton/test_output_nextflow2.nf +++ b/tests/assets/oton/test_output_nextflow2.nf @@ -59,18 +59,16 @@ process ocrd_cis_ocropy_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -81,18 +79,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -103,18 +99,16 @@ process ocrd_skimage_denoise_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -125,18 +119,16 @@ process ocrd_tesserocr_deskew_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' + ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}' """ } @@ -147,18 +139,16 @@ process ocrd_tesserocr_segment_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"shrink_polygons": true}' + ocrd-tesserocr-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"shrink_polygons": true}' """ } @@ -169,18 +159,16 @@ process ocrd_cis_ocropy_dewarp_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -191,18 +179,16 @@ process ocrd_tesserocr_recognize_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"textequiv_level": "glyph", "overwrite_segments": true, "model": "GT4HistOCR_50000000.997_191951"}' + ocrd-tesserocr-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"textequiv_level": "glyph", "overwrite_segments": true, "model": "GT4HistOCR_50000000.997_191951"}' """ } @@ -225,12 +211,12 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_skimage_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN-DENOISE") - ocrd_tesserocr_deskew_3(ocrd_skimage_denoise_2.out[0], ocrd_skimage_denoise_2.out[1], ocrd_skimage_denoise_2.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_tesserocr_segment_4(ocrd_tesserocr_deskew_3.out[0], ocrd_tesserocr_deskew_3.out[1], ocrd_tesserocr_deskew_3.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") - ocrd_cis_ocropy_dewarp_5(ocrd_tesserocr_segment_4.out[0], ocrd_tesserocr_segment_4.out[1], ocrd_tesserocr_segment_4.out[2], "OCR-D-SEG", "OCR-D-SEG-DEWARP") - ocrd_tesserocr_recognize_6(ocrd_cis_ocropy_dewarp_5.out[0], ocrd_cis_ocropy_dewarp_5.out[1], ocrd_cis_ocropy_dewarp_5.out[2], "OCR-D-SEG-DEWARP", "OCR-D-OCR") + ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_skimage_denoise_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN-DENOISE") + ocrd_tesserocr_deskew_3(ocrd_skimage_denoise_2.out[0], ocrd_skimage_denoise_2.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_tesserocr_segment_4(ocrd_tesserocr_deskew_3.out[0], ocrd_tesserocr_deskew_3.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG") + ocrd_cis_ocropy_dewarp_5(ocrd_tesserocr_segment_4.out[0], ocrd_tesserocr_segment_4.out[1], "OCR-D-SEG", "OCR-D-SEG-DEWARP") + ocrd_tesserocr_recognize_6(ocrd_cis_ocropy_dewarp_5.out[0], ocrd_cis_ocropy_dewarp_5.out[1], "OCR-D-SEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_tesserocr_recognize_6.out[0], ocrd_tesserocr_recognize_6.out[1]) } diff --git a/tests/assets/oton/test_output_nextflow3.nf b/tests/assets/oton/test_output_nextflow3.nf index 5d2c84b1..8e8df4ac 100644 --- a/tests/assets/oton/test_output_nextflow3.nf +++ b/tests/assets/oton/test_output_nextflow3.nf @@ -51,18 +51,16 @@ process ocrd_dinglehopper_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-dinglehopper -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-dinglehopper -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -73,18 +71,16 @@ process ocrd_dinglehopper_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-dinglehopper -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-dinglehopper -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -95,18 +91,16 @@ process ocrd_dinglehopper_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-dinglehopper -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-dinglehopper -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -129,8 +123,8 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_dinglehopper_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-EVAL-SEG-BLOCK") - ocrd_dinglehopper_1(ocrd_dinglehopper_0.out[0], ocrd_dinglehopper_0.out[1], ocrd_dinglehopper_0.out[2], "OCR-D-GT-SEG-LINE,OCR-D-OCR", "OCR-D-EVAL-SEG-LINE") - ocrd_dinglehopper_2(ocrd_dinglehopper_1.out[0], ocrd_dinglehopper_1.out[1], ocrd_dinglehopper_1.out[2], "OCR-D-GT-SEG-PAGE,OCR-D-OCR", "OCR-D-EVAL-SEG-PAGE") + ocrd_dinglehopper_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-EVAL-SEG-BLOCK") + ocrd_dinglehopper_1(ocrd_dinglehopper_0.out[0], ocrd_dinglehopper_0.out[1], "OCR-D-GT-SEG-LINE,OCR-D-OCR", "OCR-D-EVAL-SEG-LINE") + ocrd_dinglehopper_2(ocrd_dinglehopper_1.out[0], ocrd_dinglehopper_1.out[1], "OCR-D-GT-SEG-PAGE,OCR-D-OCR", "OCR-D-EVAL-SEG-PAGE") merging_mets(ocrd_dinglehopper_2.out[0], ocrd_dinglehopper_2.out[1]) } diff --git a/tests/assets/oton/test_output_nextflow4.nf b/tests/assets/oton/test_output_nextflow4.nf index e7eb76b3..7f5c313d 100644 --- a/tests/assets/oton/test_output_nextflow4.nf +++ b/tests/assets/oton/test_output_nextflow4.nf @@ -71,18 +71,16 @@ process ocrd_olena_binarize_0 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-olena-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"impl": "sauvola"}' + ocrd-olena-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"impl": "sauvola"}' """ } @@ -93,18 +91,16 @@ process ocrd_anybaseocr_crop_1 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -115,18 +111,16 @@ process ocrd_olena_binarize_2 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-olena-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"impl": "kim"}' + ocrd-olena-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"impl": "kim"}' """ } @@ -137,18 +131,16 @@ process ocrd_cis_ocropy_denoise_3 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-cis-ocropy-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -159,18 +151,16 @@ process ocrd_cis_ocropy_deskew_4 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' + ocrd-cis-ocropy-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}' """ } @@ -181,18 +171,16 @@ process ocrd_tesserocr_segment_region_5 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-segment-region -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-tesserocr-segment-region -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -203,18 +191,16 @@ process ocrd_segment_repair_6 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-segment-repair -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true}' + ocrd-segment-repair -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"plausibilize": true}' """ } @@ -225,18 +211,16 @@ process ocrd_cis_ocropy_deskew_7 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "region"}' + ocrd-cis-ocropy-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "region"}' """ } @@ -247,18 +231,16 @@ process ocrd_cis_ocropy_clip_8 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-clip -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "region"}' + ocrd-cis-ocropy-clip -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "region"}' """ } @@ -269,18 +251,16 @@ process ocrd_tesserocr_segment_line_9 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-tesserocr-segment-line -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-tesserocr-segment-line -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -291,18 +271,16 @@ process ocrd_segment_repair_10 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-segment-repair -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"sanitize": true}' + ocrd-segment-repair -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"sanitize": true}' """ } @@ -313,18 +291,16 @@ process ocrd_cis_ocropy_dewarp_11 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} + ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} """ } @@ -335,18 +311,16 @@ process ocrd_calamari_recognize_12 { input: val mets_path val page_range - val workspace_dir val input_group val output_group output: val mets_path val page_range - val workspace_dir script: """ - ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' + ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}' """ } @@ -369,18 +343,18 @@ workflow { main: ch_range_multipliers = Channel.of(0..params.forks.intValue()-1) split_page_ranges(ch_range_multipliers) - ocrd_olena_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN") - ocrd_anybaseocr_crop_1(ocrd_olena_binarize_0.out[0], ocrd_olena_binarize_0.out[1], ocrd_olena_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP") - ocrd_olena_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2") - ocrd_cis_ocropy_denoise_3(ocrd_olena_binarize_2.out[0], ocrd_olena_binarize_2.out[1], ocrd_olena_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") - ocrd_cis_ocropy_deskew_4(ocrd_cis_ocropy_denoise_3.out[0], ocrd_cis_ocropy_denoise_3.out[1], ocrd_cis_ocropy_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") - ocrd_tesserocr_segment_region_5(ocrd_cis_ocropy_deskew_4.out[0], ocrd_cis_ocropy_deskew_4.out[1], ocrd_cis_ocropy_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG-REG") - ocrd_segment_repair_6(ocrd_tesserocr_segment_region_5.out[0], ocrd_tesserocr_segment_region_5.out[1], ocrd_tesserocr_segment_region_5.out[2], "OCR-D-SEG-REG", "OCR-D-SEG-REPAIR") - ocrd_cis_ocropy_deskew_7(ocrd_segment_repair_6.out[0], ocrd_segment_repair_6.out[1], ocrd_segment_repair_6.out[2], "OCR-D-SEG-REPAIR", "OCR-D-SEG-REG-DESKEW") - ocrd_cis_ocropy_clip_8(ocrd_cis_ocropy_deskew_7.out[0], ocrd_cis_ocropy_deskew_7.out[1], ocrd_cis_ocropy_deskew_7.out[2], "OCR-D-SEG-REG-DESKEW", "OCR-D-SEG-REG-DESKEW-CLIP") - ocrd_tesserocr_segment_line_9(ocrd_cis_ocropy_clip_8.out[0], ocrd_cis_ocropy_clip_8.out[1], ocrd_cis_ocropy_clip_8.out[2], "OCR-D-SEG-REG-DESKEW-CLIP", "OCR-D-SEG-LINE") - ocrd_segment_repair_10(ocrd_tesserocr_segment_line_9.out[0], ocrd_tesserocr_segment_line_9.out[1], ocrd_tesserocr_segment_line_9.out[2], "OCR-D-SEG-LINE", "OCR-D-SEG-REPAIR-LINE") - ocrd_cis_ocropy_dewarp_11(ocrd_segment_repair_10.out[0], ocrd_segment_repair_10.out[1], ocrd_segment_repair_10.out[2], "OCR-D-SEG-REPAIR-LINE", "OCR-D-SEG-LINE-RESEG-DEWARP") - ocrd_calamari_recognize_12(ocrd_cis_ocropy_dewarp_11.out[0], ocrd_cis_ocropy_dewarp_11.out[1], ocrd_cis_ocropy_dewarp_11.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") + ocrd_olena_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN") + ocrd_anybaseocr_crop_1(ocrd_olena_binarize_0.out[0], ocrd_olena_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP") + ocrd_olena_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2") + ocrd_cis_ocropy_denoise_3(ocrd_olena_binarize_2.out[0], ocrd_olena_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE") + ocrd_cis_ocropy_deskew_4(ocrd_cis_ocropy_denoise_3.out[0], ocrd_cis_ocropy_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW") + ocrd_tesserocr_segment_region_5(ocrd_cis_ocropy_deskew_4.out[0], ocrd_cis_ocropy_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG-REG") + ocrd_segment_repair_6(ocrd_tesserocr_segment_region_5.out[0], ocrd_tesserocr_segment_region_5.out[1], "OCR-D-SEG-REG", "OCR-D-SEG-REPAIR") + ocrd_cis_ocropy_deskew_7(ocrd_segment_repair_6.out[0], ocrd_segment_repair_6.out[1], "OCR-D-SEG-REPAIR", "OCR-D-SEG-REG-DESKEW") + ocrd_cis_ocropy_clip_8(ocrd_cis_ocropy_deskew_7.out[0], ocrd_cis_ocropy_deskew_7.out[1], "OCR-D-SEG-REG-DESKEW", "OCR-D-SEG-REG-DESKEW-CLIP") + ocrd_tesserocr_segment_line_9(ocrd_cis_ocropy_clip_8.out[0], ocrd_cis_ocropy_clip_8.out[1], "OCR-D-SEG-REG-DESKEW-CLIP", "OCR-D-SEG-LINE") + ocrd_segment_repair_10(ocrd_tesserocr_segment_line_9.out[0], ocrd_tesserocr_segment_line_9.out[1], "OCR-D-SEG-LINE", "OCR-D-SEG-REPAIR-LINE") + ocrd_cis_ocropy_dewarp_11(ocrd_segment_repair_10.out[0], ocrd_segment_repair_10.out[1], "OCR-D-SEG-REPAIR-LINE", "OCR-D-SEG-LINE-RESEG-DEWARP") + ocrd_calamari_recognize_12(ocrd_cis_ocropy_dewarp_11.out[0], ocrd_cis_ocropy_dewarp_11.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR") merging_mets(ocrd_calamari_recognize_12.out[0], ocrd_calamari_recognize_12.out[1]) } From 906261313edd3c4eefd960e0a23b8570ef33233e Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 15:15:02 +0100 Subject: [PATCH 07/29] use odem workflow for integration test --- tests/integration_tests/test_full_cycle.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration_tests/test_full_cycle.py b/tests/integration_tests/test_full_cycle.py index acc1cec4..93668ba1 100644 --- a/tests/integration_tests/test_full_cycle.py +++ b/tests/integration_tests/test_full_cycle.py @@ -73,14 +73,14 @@ def test_full_cycle(auth_harvester, operandi, service_broker, bytes_small_worksp remove_file_grps_list_sbb = [] # Post workflow job - workflow_id = "sbb_workflow" + workflow_id = "odem_workflow_with_MS" input_file_grp = DEFAULT_FILE_GRP req_data = { "workflow_id": workflow_id, "workflow_args": { "workspace_id": workspace_id, "input_file_grp": input_file_grp, - "remove_file_grps": ",".join(remove_file_grps_list_sbb), + "remove_file_grps": ",".join(remove_file_grps_list_odem), "mets_name": DEFAULT_METS_BASENAME }, "sbatch_args": {"partition": HPC_NHR_JOB_TEST_PARTITION, "cpus": 8, "ram": 32} From 6ffd3fb4382c5e17611840c30ade345b5c906711 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 15:26:45 +0100 Subject: [PATCH 08/29] fix: file groups removal --- .../hpc/batch_scripts/batch_submit_workflow_job.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh index cc2dcafa..de04c52b 100755 --- a/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh +++ b/src/utils/operandi_utils/hpc/batch_scripts/batch_submit_workflow_job.sh @@ -230,6 +230,8 @@ remove_file_group_from_workspace() { echo "Removing file group: $1" REMOVE_FILE_GROUP_COMMAND="${REMOVE_FILE_GROUP_COMMAND//FILE_GROUP_PLACEHOLDER/$1}" eval "$REMOVE_FILE_GROUP_COMMAND" + # Set the placeholder again for future invocations + REMOVE_FILE_GROUP_COMMAND="${REMOVE_FILE_GROUP_COMMAND//$1/FILE_GROUP_PLACEHOLDER}" } remove_file_groups_from_workspace() { From e08aa9a2995eaa2f595d61f495d42a1135dc9598 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 15:37:32 +0100 Subject: [PATCH 09/29] turn on again: core caching --- src/utils/operandi_utils/hpc/nhr_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/operandi_utils/hpc/nhr_executor.py b/src/utils/operandi_utils/hpc/nhr_executor.py index c1bd4aa0..baaa02ba 100644 --- a/src/utils/operandi_utils/hpc/nhr_executor.py +++ b/src/utils/operandi_utils/hpc/nhr_executor.py @@ -206,7 +206,7 @@ def cmd_nextflow_run( apptainer_cmd = f"apptainer exec --bind {hpc_ws_dir}:/ws_data --bind {bind_ocrd_models}" # Mets caching is disabled for the core, to avoid the cache error # when mergin mets files https://github.com/OCR-D/core/issues/1297 - apptainer_cmd_core = f"{apptainer_cmd} --env OCRD_METS_CACHING=false" + apptainer_cmd_core = f"{apptainer_cmd} --env OCRD_METS_CACHING=true" apptainer_cmd_step = f"{apptainer_cmd} --env OCRD_METS_CACHING=true" apptainer_image = sif_core if use_slim_images else sif_ocrd_all core_command = f"{apptainer_cmd_core} {PH_NODE_DIR_PROCESSOR_SIFS}/{apptainer_image}" From 88f4cedb2a2db0c856a7907e8c0b43539cc5fa6a Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 15:38:10 +0100 Subject: [PATCH 10/29] release: v2.18.1 --- src/utils/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/setup.py b/src/utils/setup.py index 8a37bb30..1fcdc071 100644 --- a/src/utils/setup.py +++ b/src/utils/setup.py @@ -5,7 +5,7 @@ setup( name='operandi_utils', - version='2.18.0', + version='2.18.1', description='OPERANDI - Utils', long_description=open('README.md').read(), long_description_content_type='text/markdown', From 920d1fc9497d5ac9dfa5ba7a640c380f3e559382 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Wed, 4 Dec 2024 15:51:12 +0100 Subject: [PATCH 11/29] disable: core mets caching --- src/utils/operandi_utils/hpc/nhr_executor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/operandi_utils/hpc/nhr_executor.py b/src/utils/operandi_utils/hpc/nhr_executor.py index baaa02ba..c1bd4aa0 100644 --- a/src/utils/operandi_utils/hpc/nhr_executor.py +++ b/src/utils/operandi_utils/hpc/nhr_executor.py @@ -206,7 +206,7 @@ def cmd_nextflow_run( apptainer_cmd = f"apptainer exec --bind {hpc_ws_dir}:/ws_data --bind {bind_ocrd_models}" # Mets caching is disabled for the core, to avoid the cache error # when mergin mets files https://github.com/OCR-D/core/issues/1297 - apptainer_cmd_core = f"{apptainer_cmd} --env OCRD_METS_CACHING=true" + apptainer_cmd_core = f"{apptainer_cmd} --env OCRD_METS_CACHING=false" apptainer_cmd_step = f"{apptainer_cmd} --env OCRD_METS_CACHING=true" apptainer_image = sif_core if use_slim_images else sif_ocrd_all core_command = f"{apptainer_cmd_core} {PH_NODE_DIR_PROCESSOR_SIFS}/{apptainer_image}" From 9d0efc5f0ade7dc336c395721123695bf36eb1d2 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 5 Dec 2024 13:42:36 +0100 Subject: [PATCH 12/29] docker compose: use tag --- docker-compose_image_based.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose_image_based.yml b/docker-compose_image_based.yml index 8d85b105..97c4f18e 100644 --- a/docker-compose_image_based.yml +++ b/docker-compose_image_based.yml @@ -54,7 +54,7 @@ services: retries: 120 operandi-server: - image: ghcr.io/subugoe/operandi-server:main + image: ghcr.io/subugoe/operandi-server:latest container_name: operandi-server depends_on: operandi-rabbitmq: @@ -84,7 +84,7 @@ services: command: operandi-server start operandi-broker: - image: ghcr.io/subugoe/operandi-broker:main + image: ghcr.io/subugoe/operandi-broker:latest container_name: operandi-broker depends_on: operandi-rabbitmq: From ff8b4c9bed15dc0b3b20a9aab899a9ebc85f4ca8 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 5 Dec 2024 15:37:49 +0100 Subject: [PATCH 13/29] revert: rabbitmq heartbeat to 0 --- src/utils/operandi_utils/rabbitmq/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/operandi_utils/rabbitmq/constants.py b/src/utils/operandi_utils/rabbitmq/constants.py index e260ca39..7261fcab 100644 --- a/src/utils/operandi_utils/rabbitmq/constants.py +++ b/src/utils/operandi_utils/rabbitmq/constants.py @@ -6,7 +6,7 @@ RABBITMQ_QUEUE_USERS: str = "operandi_queue_users" # Defines how often the RabbitMQ broker will send requests to the clients to verify their live state -HEARTBEAT: int = 60 +HEARTBEAT: int = 0 # Wait seconds before next reconnect try RECONNECT_WAIT: int = 3 # Reconnect tries before timeout From f4270758d6cd5855678b95f612bf0aba454f6b17 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 5 Dec 2024 15:38:15 +0100 Subject: [PATCH 14/29] release: v2.18.2 --- src/utils/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/setup.py b/src/utils/setup.py index 1fcdc071..858b9429 100644 --- a/src/utils/setup.py +++ b/src/utils/setup.py @@ -5,7 +5,7 @@ setup( name='operandi_utils', - version='2.18.1', + version='2.18.2', description='OPERANDI - Utils', long_description=open('README.md').read(), long_description_content_type='text/markdown', From 0880f5ad4c0a88d46a239209fb17c6be37012ec7 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 15:04:52 +0100 Subject: [PATCH 15/29] get rid of generated module version in NF workflow scripts --- .../operandi_utils/hpc/nextflow_workflows/default_workflow.nf | 2 +- .../hpc/nextflow_workflows/default_workflow_with_MS.nf | 2 +- .../operandi_utils/hpc/nextflow_workflows/odem_workflow.nf | 2 +- .../hpc/nextflow_workflows/odem_workflow_with_MS.nf | 2 +- .../operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf | 2 +- .../hpc/nextflow_workflows/sbb_workflow_with_MS.nf | 2 +- .../operandi_utils/hpc/nextflow_workflows/template_workflow.nf | 2 +- .../hpc/nextflow_workflows/template_workflow_with_MS.nf | 2 +- src/utils/operandi_utils/oton/constants.py | 3 +-- 9 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf index 4e9c0699..464b2c69 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf index ba9bf528..6be5d920 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf index a40c8b33..a22b221a 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf index 7fc2a553..4b7b94d8 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf index 559c6b8b..0dcc63a0 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf index b6e12509..840fae0e 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf index 537ac15f..859958d0 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf index 0d006429..ddbe7319 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/oton/constants.py b/src/utils/operandi_utils/oton/constants.py index 180fc94b..9376f844 100644 --- a/src/utils/operandi_utils/oton/constants.py +++ b/src/utils/operandi_utils/oton/constants.py @@ -1,7 +1,6 @@ from json import load from os import environ from pkg_resources import resource_filename -from operandi_utils.constants import OPERANDI_VERSION BS: str = '{}' SPACES = ' ' @@ -32,4 +31,4 @@ PARAMS_KEY_CPUS_PER_FORK: str = 'params.cpus_per_fork' PARAMS_KEY_RAM_PER_FORK: str = 'params.ram_per_fork' -WORKFLOW_COMMENT = f"// This workflow was automatically generated by the v{OPERANDI_VERSION} operandi_utils.oton module" +WORKFLOW_COMMENT = f"// This workflow was automatically generated by the operandi_utils.oton module" From 1709f307af586ad9ff665b3ecaffeba22e639bc4 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 15:35:31 +0100 Subject: [PATCH 16/29] refactor: user auth invokation --- .../operandi_server/routers/admin_panel.py | 5 +-- .../operandi_server/routers/discovery.py | 9 ++-- src/server/operandi_server/routers/user.py | 29 ++++-------- .../operandi_server/routers/user_utils.py | 45 +++++++++++++------ .../operandi_server/routers/workflow.py | 21 +++++---- .../operandi_server/routers/workspace.py | 17 ++++--- 6 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 1539fc49..0a21a4f3 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -11,13 +11,12 @@ db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user, db_get_all_workflows_by_user ) from operandi_utils.utils import send_bag_to_ola_hd -from .user import RouterUser +from .user_utils import user_auth_with_handling from .workspace_utils import create_workspace_bag, get_db_workspace_with_handling, validate_bag_with_handling class RouterAdminPanel: def __init__(self): self.logger = getLogger("operandi_server.routers.user") - self.user_authenticator = RouterUser() self.router = APIRouter(tags=[ServerApiTag.ADMIN]) self.router.add_api_route( path="/admin/users", @@ -51,7 +50,7 @@ def __init__(self): ) async def auth_admin_with_handling(self, auth: HTTPBasicCredentials): - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) if py_user_action.account_type != AccountType.ADMIN: message = f"Admin privileges required for the endpoint" self.logger.error(f"{message}") diff --git a/src/server/operandi_server/routers/discovery.py b/src/server/operandi_server/routers/discovery.py index a2f8de7a..ef674b04 100644 --- a/src/server/operandi_server/routers/discovery.py +++ b/src/server/operandi_server/routers/discovery.py @@ -12,13 +12,12 @@ from operandi_utils.constants import ServerApiTag from operandi_utils.oton.constants import OCRD_ALL_JSON from operandi_server.models import PYDiscovery -from .user import RouterUser +from .user_utils import user_auth_with_handling class RouterDiscovery: def __init__(self): self.logger = getLogger("operandi_server.routers.discovery") - self.user_authenticator = RouterUser() self.router = APIRouter(tags=[ServerApiTag.DISCOVERY]) self.router.add_api_route( @@ -39,7 +38,7 @@ def __init__(self): ) async def discovery(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> PYDiscovery: - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) response = PYDiscovery( ram=virtual_memory().total / (1024.0 ** 3), cpu_cores=cpu_count(), @@ -52,7 +51,7 @@ async def discovery(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> return response async def get_processor_names(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> List[str]: - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) try: processor_names = list(OCRD_ALL_JSON.keys()) return processor_names @@ -65,7 +64,7 @@ async def get_processor_names(self, auth: HTTPBasicCredentials = Depends(HTTPBas raise HTTPException(status_code=500, detail="An unexpected error occurred while loading processor names.") async def get_processor_info(self, processor_name: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> Dict: - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) try: if processor_name not in OCRD_ALL_JSON: raise HTTPException(status_code=404, detail=f"Processor '{processor_name}' not found.") diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index 600a1e09..1c4f8b96 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -1,7 +1,7 @@ from logging import getLogger from typing import List, Optional from datetime import datetime -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, status from fastapi.security import HTTPBasic, HTTPBasicCredentials from operandi_utils.constants import AccountType, ServerApiTag @@ -9,10 +9,9 @@ db_get_processing_stats, db_get_all_workflow_jobs_by_user, db_get_user_account_with_email, db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user, db_get_all_workflows_by_user ) -from operandi_server.exceptions import AuthenticationError from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics -from .user_utils import user_auth, user_register_with_handling +from .user_utils import user_auth_with_handling, user_register_with_handling class RouterUser: @@ -60,19 +59,7 @@ async def user_login(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> """ Used for user authentication. """ - email = auth.username - password = auth.password - headers = {"WWW-Authenticate": "Basic"} - if not (email and password): - message = f"User login failed, missing e-mail or password field." - self.logger.error(f"{message}") - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, headers=headers, detail=message) - try: - db_user_account = await user_auth(email=email, password=password) - except AuthenticationError as error: - self.logger.error(f"{error}") - raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, headers=headers, detail=str(error)) - return PYUserAction.from_db_user_account(action="Successfully logged!", db_user_account=db_user_account) + return await user_auth_with_handling(logger=self.logger, auth=auth) async def user_register( self, email: str, password: str, institution_id: str, account_type: AccountType = AccountType.USER, @@ -80,7 +67,7 @@ async def user_register( ) -> PYUserAction: """ Used for registration. - There are 3 account types: + There are 4 account types: 1) ADMIN 2) USER 3) HARVESTER @@ -101,7 +88,7 @@ async def user_register( return PYUserAction.from_db_user_account(action=action, db_user_account=db_user_account) async def user_processing_stats(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())): - await self.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_user_account = await db_get_user_account_with_email(email=auth.username) db_processing_stats = await db_get_processing_stats(db_user_account.user_id) return db_processing_stats @@ -113,7 +100,7 @@ async def user_workflow_jobs( """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await self.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_user_account = await db_get_user_account_with_email(email=auth.username) db_workflow_jobs = await db_get_all_workflow_jobs_by_user( user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) @@ -131,7 +118,7 @@ async def user_workspaces( """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await self.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_user_account = await db_get_user_account_with_email(email=auth.username) db_workspaces = await db_get_all_workspaces_by_user( user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) @@ -144,7 +131,7 @@ async def user_workflows( """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await self.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_user_account = await db_get_user_account_with_email(email=auth.username) db_workflows = await db_get_all_workflows_by_user( user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/user_utils.py b/src/server/operandi_server/routers/user_utils.py index cdacf3dc..054c4642 100644 --- a/src/server/operandi_server/routers/user_utils.py +++ b/src/server/operandi_server/routers/user_utils.py @@ -1,4 +1,5 @@ -from fastapi import HTTPException, status +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBasic, HTTPBasicCredentials from hashlib import sha512 from random import random from typing import Tuple @@ -7,8 +8,7 @@ from operandi_utils.database import ( db_create_processing_stats, db_create_user_account, db_get_user_account, db_get_user_account_with_email, DBUserAccount) -from operandi_server.exceptions import AuthenticationError - +from operandi_server.models import PYUserAction async def create_user_if_not_available( @@ -22,23 +22,40 @@ async def create_user_if_not_available( logger, email=username, password=password, account_type=account_type, approved_user=approved_user, details=details, institution_id=institution_id) -async def user_auth(email: str, password: str) -> DBUserAccount: +async def user_auth_with_handling( + logger, auth: HTTPBasicCredentials = Depends(HTTPBasic()), headers=None +) -> PYUserAction: + email = auth.username + password = auth.password + if headers is None: + headers = {"WWW-Authenticate": "Basic"} + if not (email and password): + message = f"User login failed, missing e-mail or password field." + logger.error(f"{message}") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, headers=headers, detail=message) try: - db_user = await db_get_user_account_with_email(email=email) + db_user_account = await db_get_user_account_with_email(email=email) except RuntimeError: - raise AuthenticationError(f"Not found user account for email: {email}") - password_status = validate_password(plain_password=password, encrypted_password=db_user.encrypted_pass) + message = f"Not found user account for email: {email}" + logger.error(f"{message}") + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, headers=headers, detail=message) + if not db_user_account.approved_user: + message = f"The account has not been approved by the admin yet." + logger.error(f"{message}") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, headers=headers, detail=message) + password_status = validate_password(plain_password=password, encrypted_password=db_user_account.encrypted_pass) if not password_status: - raise AuthenticationError(f"Wrong credentials for email: {email}") - if not db_user.approved_user: - raise AuthenticationError(f"The account has not been approved by the admin yet.") - return db_user + message = f"Wrong credentials for email: {email}" + logger.error(f"{message}") + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, headers=headers, detail=message) + return PYUserAction.from_db_user_account(action="Successfully logged!", db_user_account=db_user_account) async def user_register_with_handling( logger, email: str, password: str, account_type: AccountType, institution_id: str, approved_user: bool = False, - details: str = "User Account" -): - headers = {"WWW-Authenticate": "Basic"} + details: str = "User Account", headers=None +) -> DBUserAccount: + if headers is None: + headers = {"WWW-Authenticate": "Basic"} if account_type not in AccountType: message = f"Wrong account type. Must be one of: {AccountType}" logger.error(f"{message}") diff --git a/src/server/operandi_server/routers/workflow.py b/src/server/operandi_server/routers/workflow.py index f06ca741..8afc72f3 100644 --- a/src/server/operandi_server/routers/workflow.py +++ b/src/server/operandi_server/routers/workflow.py @@ -34,13 +34,12 @@ validate_oton_with_handling, nf_script_executable_steps_with_handling ) from .workspace_utils import check_if_file_group_exists_with_handling, get_db_workspace_with_handling -from .user import RouterUser +from .user_utils import user_auth_with_handling class RouterWorkflow: def __init__(self): self.logger = getLogger("operandi_server.routers.workflow") - self.user_authenticator = RouterUser() # The workflows available to all users by default self.production_workflows = [] @@ -190,7 +189,7 @@ async def list_workflows(self, auth: HTTPBasicCredentials = Depends(HTTPBasic()) Curl equivalent: `curl SERVER_ADDR/workflow` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) workflows = get_all_resources_url(SERVER_WORKFLOWS_ROUTER) response = [] for workflow in workflows: @@ -206,7 +205,7 @@ async def download_workflow_script( Curl equivalent: `curl -X GET SERVER_ADDR/workflow/{workflow_id} -H "accept: text/vnd.ocrd.workflow" -o foo.nf` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_workflow = await get_db_workflow_with_handling(self.logger, workflow_id=workflow_id) return FileResponse( path=db_workflow.workflow_script_path, @@ -222,7 +221,7 @@ async def upload_workflow_script( Curl equivalent: `curl -X POST SERVER_ADDR/workflow -F nextflow_script=example.nf` """ - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) workflow_id, workflow_dir = create_resource_dir(SERVER_WORKFLOWS_ROUTER, resource_id=None) nf_script_dest = join(workflow_dir, nextflow_script.filename) try: @@ -247,7 +246,7 @@ async def update_workflow_script( Curl equivalent: `curl -X PUT SERVER_ADDR/workflow/{workflow_id} -F nextflow_script=example.nf` """ - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) if workflow_id in self.production_workflows: message = f"Production workflow cannot be replaced. Tried to replace: {workflow_id}" self.logger.error(message) @@ -282,7 +281,7 @@ async def get_workflow_job_status( Curl equivalent: `curl -X GET SERVER_ADDR/workflow/{workflow_id}/{job_id}` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_wf_job = await get_db_workflow_job_with_handling(self.logger, job_id=job_id, check_local_existence=True) workspace_id = db_wf_job.workspace_id @@ -316,7 +315,7 @@ async def download_workflow_job_logs( Curl equivalent: `curl -X GET SERVER_ADDR/workflow/{workflow_id}/logs -H "accept: application/vnd.zip" -o foo.zip` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) await self._push_status_request_to_rabbitmq(job_id=job_id) db_wf_job = await get_db_workflow_job_with_handling(self.logger, job_id=job_id, check_local_existence=True) @@ -340,7 +339,7 @@ async def download_workflow_job_logs( async def download_workflow_job_hpc_log( self, workflow_id: str, job_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())): - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) await self._push_status_request_to_rabbitmq(job_id=job_id) db_wf_job = await get_db_workflow_job_with_handling(self.logger, job_id=job_id, check_local_existence=True) @@ -369,7 +368,7 @@ async def submit_to_rabbitmq_queue( self, workflow_id: str, workflow_args: WorkflowArguments, sbatch_args: SbatchArguments, details: str = "Workflow job", auth: HTTPBasicCredentials = Depends(HTTPBasic()) ): - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) user_account_type = py_user_action.account_type try: @@ -474,7 +473,7 @@ async def convert_txt_to_nextflow( self, txt_file: UploadFile, environment: str, with_mets_server: bool = True, auth: HTTPBasicCredentials = Depends(HTTPBasic()) ): - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) oton_id, oton_dir = create_resource_dir(SERVER_OTON_CONVERSIONS, resource_id=None) ocrd_process_txt = join(oton_dir, f"ocrd_process_input.txt") nf_script_dest = join(oton_dir, f"nextflow_output.nf") diff --git a/src/server/operandi_server/routers/workspace.py b/src/server/operandi_server/routers/workspace.py index db072300..afb7001f 100644 --- a/src/server/operandi_server/routers/workspace.py +++ b/src/server/operandi_server/routers/workspace.py @@ -26,13 +26,12 @@ parse_file_groups_with_handling, remove_file_groups_with_handling ) -from .user import RouterUser +from .user_utils import user_auth_with_handling class RouterWorkspace: def __init__(self): self.logger = getLogger("operandi_server.routers.workspace") - self.user_authenticator = RouterUser() self.router = APIRouter(tags=[ServerApiTag.WORKSPACE]) self.router.add_api_route( path="/import_external_workspace", @@ -82,7 +81,7 @@ async def list_workspaces(self, auth: HTTPBasicCredentials = Depends(HTTPBasic() Curl equivalent: `curl -X GET SERVER_ADDR/workspace` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) workspaces = get_all_resources_url(SERVER_WORKSPACES_ROUTER) response = [] for workspace in workspaces: @@ -100,7 +99,7 @@ async def download_workspace( Curl equivalent: `curl -X GET SERVER_ADDR/workspace/{workspace_id} -H "accept: application/vnd.ocrd+zip" -o foo.zip` """ - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) db_workspace = await get_db_workspace_with_handling( self.logger, workspace_id, check_ready=True, check_deleted=True, check_local_existence=True) @@ -124,7 +123,7 @@ async def upload_workspace_from_url( self, mets_url: str, preserve_file_grps: str, mets_basename: str = DEFAULT_METS_BASENAME, details: str = f"Workspace imported from a mets file url", auth: HTTPBasicCredentials = Depends(HTTPBasic()) ) -> WorkspaceRsrc: - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) file_grps_to_preserve = parse_file_groups_with_handling(self.logger, file_groups=preserve_file_grps) workspace_id, workspace_dir = create_resource_dir(SERVER_WORKSPACES_ROUTER) @@ -161,7 +160,7 @@ async def upload_workspace( Curl equivalent: `curl -X POST SERVER_ADDR/workspace -H "content-type: multipart/form-data" -F workspace=example_ws.ocrd.zip` """ - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) ws_id, ws_dir = create_resource_dir(SERVER_WORKSPACES_ROUTER, resource_id=None) bag_dest = f"{ws_dir}.zip" try: @@ -194,7 +193,7 @@ async def put_workspace( `curl -X PUT SERVER_ADDR/workspace/{workspace_id} -H "content-type: multipart/form-data" -F workspace=example_ws.ocrd.zip` """ - py_user_action = await self.user_authenticator.user_login(auth) + py_user_action = await user_auth_with_handling(self.logger, auth) try: await db_get_workspace(workspace_id=workspace_id) # Note: This check raises HTTP errors on RuntimeError for @@ -240,7 +239,7 @@ async def delete_workspace( Curl equivalent: `curl -X DELETE SERVER_ADDR/workspace/{workspace_id}` """ - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) await get_db_workspace_with_handling( self.logger, workspace_id, check_ready=True, check_deleted=True, check_local_existence=True) @@ -259,7 +258,7 @@ async def remove_file_group_from_workspace( self, workspace_id: str, remove_file_grps: str, recursive: bool = True, force: bool = True, auth: HTTPBasicCredentials = Depends(HTTPBasic()) ) -> WorkspaceRsrc: - await self.user_authenticator.user_login(auth) + await user_auth_with_handling(self.logger, auth) db_workspace = await get_db_workspace_with_handling( self.logger, workspace_id, check_ready=True, check_deleted=True, check_local_existence=True ) From bb1dccfd0fdb9e0a964ab22ed7da456ac0ed70af Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 15:35:47 +0100 Subject: [PATCH 17/29] refactor: NF workflows without version --- .../operandi_utils/hpc/nextflow_workflows/default_workflow.nf | 2 +- .../hpc/nextflow_workflows/default_workflow_with_MS.nf | 2 +- .../operandi_utils/hpc/nextflow_workflows/odem_workflow.nf | 2 +- .../hpc/nextflow_workflows/odem_workflow_with_MS.nf | 2 +- src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf | 2 +- .../hpc/nextflow_workflows/sbb_workflow_with_MS.nf | 2 +- .../operandi_utils/hpc/nextflow_workflows/template_workflow.nf | 2 +- .../hpc/nextflow_workflows/template_workflow_with_MS.nf | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf index 464b2c69..0b63c78e 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf index 6be5d920..dbdf3455 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf index a22b221a..2a5176f7 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf index 4b7b94d8..94277800 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/odem_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf index 0dcc63a0..4a69f925 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf index 840fae0e..822670a3 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/sbb_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf index 859958d0..ad74b0f0 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf index ddbe7319..adde0171 100755 --- a/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf +++ b/src/utils/operandi_utils/hpc/nextflow_workflows/template_workflow_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.2 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" From bdf4e819ba8f153cecf48b12c08f7aefe86a232b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 15:54:11 +0100 Subject: [PATCH 18/29] refactor: password utils --- .../operandi_server/routers/password_utils.py | 19 ++++++++++++++ .../operandi_server/routers/user_utils.py | 26 ++----------------- 2 files changed, 21 insertions(+), 24 deletions(-) create mode 100644 src/server/operandi_server/routers/password_utils.py diff --git a/src/server/operandi_server/routers/password_utils.py b/src/server/operandi_server/routers/password_utils.py new file mode 100644 index 00000000..e7168efa --- /dev/null +++ b/src/server/operandi_server/routers/password_utils.py @@ -0,0 +1,19 @@ +from hashlib import sha512 +from random import random +from typing import Tuple + +def get_random_salt() -> str: + return sha512(f"{hash(str(random()))}".encode("utf-8")).hexdigest()[:8] + +def get_hex_digest(salt: str, plain_password: str): + return sha512(f"{salt}{plain_password}".encode("utf-8")).hexdigest() + +def encrypt_password(plain_password: str) -> Tuple[str, str]: + salt = get_random_salt() + hashed_password = get_hex_digest(salt, plain_password) + encrypted_password = f"{salt}${hashed_password}" + return salt, encrypted_password + +def validate_password(plain_password: str, encrypted_password: str) -> bool: + salt, hashed_password = encrypted_password.split(sep='$', maxsplit=1) + return hashed_password == get_hex_digest(salt, plain_password) diff --git a/src/server/operandi_server/routers/user_utils.py b/src/server/operandi_server/routers/user_utils.py index 054c4642..6a624b7b 100644 --- a/src/server/operandi_server/routers/user_utils.py +++ b/src/server/operandi_server/routers/user_utils.py @@ -1,14 +1,12 @@ from fastapi import Depends, HTTPException, status from fastapi.security import HTTPBasic, HTTPBasicCredentials -from hashlib import sha512 -from random import random -from typing import Tuple from operandi_utils.constants import AccountType from operandi_utils.database import ( db_create_processing_stats, db_create_user_account, db_get_user_account, db_get_user_account_with_email, DBUserAccount) from operandi_server.models import PYUserAction +from .password_utils import encrypt_password, validate_password async def create_user_if_not_available( @@ -60,10 +58,10 @@ async def user_register_with_handling( message = f"Wrong account type. Must be one of: {AccountType}" logger.error(f"{message}") raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, headers=headers, detail=message) - salt, encrypted_password = encrypt_password(password) try: await db_get_user_account(email) except RuntimeError: + salt, encrypted_password = encrypt_password(password) # No user existing with the provided e-mail, register db_user_account = await db_create_user_account( institution_id=institution_id, email=email, encrypted_pass=encrypted_password, salt=salt, @@ -73,23 +71,3 @@ async def user_register_with_handling( message = f"Another user is already registered with email: {email}" logger.error(f"{message}") raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, headers=headers, detail=message) - - -def encrypt_password(plain_password: str) -> Tuple[str, str]: - salt = get_random_salt() - hashed_password = get_hex_digest(salt, plain_password) - encrypted_password = f"{salt}${hashed_password}" - return salt, encrypted_password - - -def get_hex_digest(salt: str, plain_password: str): - return sha512(f"{salt}{plain_password}".encode("utf-8")).hexdigest() - - -def get_random_salt() -> str: - return sha512(f"{hash(str(random()))}".encode("utf-8")).hexdigest()[:8] - - -def validate_password(plain_password: str, encrypted_password: str) -> bool: - salt, hashed_password = encrypted_password.split(sep='$', maxsplit=1) - return hashed_password == get_hex_digest(salt, plain_password) From 2b7093af642ff4a8ffd1b420808c9b72d957339b Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 15:58:45 +0100 Subject: [PATCH 19/29] refactor: match same name of the same method --- src/server/operandi_server/routers/admin_panel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 0a21a4f3..fd1ddef9 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -25,7 +25,7 @@ def __init__(self): ) self.router.add_api_route( path="/admin/processing_stats/{user_id}", - endpoint=self.get_processing_stats_for_user, methods=["GET"], status_code=status.HTTP_200_OK, + endpoint=self.user_processing_stats, methods=["GET"], status_code=status.HTTP_200_OK, summary="Get processing stats for a specific user by user_id" ) self.router.add_api_route( @@ -85,7 +85,7 @@ async def get_users(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())): users = await db_get_all_user_accounts() return [PYUserInfo.from_db_user_account(user) for user in users] - async def get_processing_stats_for_user(self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())): + async def user_processing_stats(self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())): await self.auth_admin_with_handling(auth) try: db_processing_stats = await db_get_processing_stats(user_id) From c65c158067b25e017729672147d2249109d4cc91 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 16:15:56 +0100 Subject: [PATCH 20/29] refactor: remove duplication of user workflows --- src/server/operandi_server/routers/admin_panel.py | 8 ++++---- src/server/operandi_server/routers/user.py | 12 +++++------- src/server/operandi_server/routers/workflow_utils.py | 12 ++++++++++-- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index fd1ddef9..e6b74c63 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -8,10 +8,11 @@ from operandi_utils.constants import AccountType, ServerApiTag from operandi_utils.database import ( db_get_all_user_accounts, db_get_processing_stats, db_get_all_workflow_jobs_by_user, - db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user, db_get_all_workflows_by_user + db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user ) from operandi_utils.utils import send_bag_to_ola_hd from .user_utils import user_auth_with_handling +from .workflow_utils import get_workflows_of_user from .workspace_utils import create_workspace_bag, get_db_workspace_with_handling, validate_bag_with_handling class RouterAdminPanel: @@ -130,10 +131,9 @@ async def user_workspaces( async def user_workflows( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkflowRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - db_workflows = await db_get_all_workflows_by_user(user_id=user_id, start_date=start_date, end_date=end_date) - return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] + return await get_workflows_of_user(user_id=user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index 1c4f8b96..e0ac8ad7 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -7,10 +7,11 @@ from operandi_utils.constants import AccountType, ServerApiTag from operandi_utils.database import ( db_get_processing_stats, db_get_all_workflow_jobs_by_user, db_get_user_account_with_email, - db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user, db_get_all_workflows_by_user + db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user ) from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics +from .workflow_utils import get_workflows_of_user from .user_utils import user_auth_with_handling, user_register_with_handling @@ -127,12 +128,9 @@ async def user_workspaces( async def user_workflows( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkflowRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await user_auth_with_handling(self.logger, auth) - db_user_account = await db_get_user_account_with_email(email=auth.username) - db_workflows = await db_get_all_workflows_by_user( - user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) - return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] + py_user_action = await user_auth_with_handling(self.logger, auth) + return await get_workflows_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/workflow_utils.py b/src/server/operandi_server/routers/workflow_utils.py index 591798b3..84e9ca23 100644 --- a/src/server/operandi_server/routers/workflow_utils.py +++ b/src/server/operandi_server/routers/workflow_utils.py @@ -1,11 +1,13 @@ +from datetime import datetime from fastapi import HTTPException, status from pathlib import Path -from typing import List +from typing import List, Optional -from operandi_utils.database import db_get_workflow, db_get_workflow_job +from operandi_utils.database import db_get_workflow, db_get_workflow_job, db_get_all_workflows_by_user from operandi_utils.database.models import DBWorkflow, DBWorkflowJob from operandi_utils.oton import OTONConverter, OCRDValidator from operandi_utils.oton.constants import PARAMS_KEY_METS_SOCKET_PATH +from operandi_server.models import WorkflowRsrc async def get_db_workflow_with_handling( @@ -112,3 +114,9 @@ async def convert_oton_with_handling( message = "Failed to convert ocrd process workflow to nextflow workflow" logger.error(f"{message}, error: {error}") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message) + +async def get_workflows_of_user( + user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None +) -> List[WorkflowRsrc]: + db_workflows = await db_get_all_workflows_by_user(user_id=user_id, start_date=start_date, end_date=end_date) + return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] From 8ce6701225418a81de92d52317de814a021589e5 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 16:24:34 +0100 Subject: [PATCH 21/29] refactor: remove duplication of user workspaces --- src/server/operandi_server/routers/admin_panel.py | 11 ++++++----- src/server/operandi_server/routers/user.py | 14 ++++++-------- .../operandi_server/routers/workspace_utils.py | 12 ++++++++++-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index e6b74c63..b9e92436 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -8,12 +8,14 @@ from operandi_utils.constants import AccountType, ServerApiTag from operandi_utils.database import ( db_get_all_user_accounts, db_get_processing_stats, db_get_all_workflow_jobs_by_user, - db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user + db_get_workflow, db_get_workspace ) from operandi_utils.utils import send_bag_to_ola_hd from .user_utils import user_auth_with_handling from .workflow_utils import get_workflows_of_user -from .workspace_utils import create_workspace_bag, get_db_workspace_with_handling, validate_bag_with_handling +from .workspace_utils import ( + create_workspace_bag, get_workspaces_of_user, get_db_workspace_with_handling, validate_bag_with_handling +) class RouterAdminPanel: def __init__(self): @@ -120,13 +122,12 @@ async def user_workflow_jobs( async def user_workspaces( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkspaceRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - db_workspaces = await db_get_all_workspaces_by_user(user_id=user_id, start_date=start_date, end_date=end_date) - return [WorkspaceRsrc.from_db_workspace(db_workspace) for db_workspace in db_workspaces] + return await get_workspaces_of_user(user_id=user_id, start_date=start_date, end_date=end_date) async def user_workflows( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index e0ac8ad7..a0ac3bed 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -7,11 +7,12 @@ from operandi_utils.constants import AccountType, ServerApiTag from operandi_utils.database import ( db_get_processing_stats, db_get_all_workflow_jobs_by_user, db_get_user_account_with_email, - db_get_workflow, db_get_workspace, db_get_all_workspaces_by_user + db_get_workflow, db_get_workspace ) from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics from .workflow_utils import get_workflows_of_user +from .workspace_utils import get_workspaces_of_user from .user_utils import user_auth_with_handling, user_register_with_handling @@ -97,7 +98,7 @@ async def user_processing_stats(self, auth: HTTPBasicCredentials = Depends(HTTPB async def user_workflow_jobs( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkflowJobRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ @@ -115,15 +116,12 @@ async def user_workflow_jobs( async def user_workspaces( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkspaceRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await user_auth_with_handling(self.logger, auth) - db_user_account = await db_get_user_account_with_email(email=auth.username) - db_workspaces = await db_get_all_workspaces_by_user( - user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) - return [WorkspaceRsrc.from_db_workspace(db_workspace) for db_workspace in db_workspaces] + py_user_action = await user_auth_with_handling(self.logger, auth) + return await get_workspaces_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) async def user_workflows( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/workspace_utils.py b/src/server/operandi_server/routers/workspace_utils.py index f3ceb85f..79cee565 100644 --- a/src/server/operandi_server/routers/workspace_utils.py +++ b/src/server/operandi_server/routers/workspace_utils.py @@ -1,9 +1,10 @@ import bagit +from datetime import datetime from fastapi import HTTPException, status from os.path import join from pathlib import Path from tempfile import NamedTemporaryFile -from typing import List, Union +from typing import List, Optional, Union from zipfile import ZipFile from ocrd import Resolver @@ -14,8 +15,9 @@ from operandi_server.constants import DEFAULT_FILE_GRP, DEFAULT_METS_BASENAME from operandi_server.exceptions import WorkspaceNotValidException from operandi_utils.constants import StateWorkspace -from operandi_utils.database import db_get_workspace +from operandi_utils.database import db_get_workspace, db_get_all_workspaces_by_user from operandi_utils.database.models import DBWorkspace +from operandi_server.models import WorkspaceRsrc def get_ocrd_workspace_physical_pages(mets_path: str) -> List[str]: @@ -216,3 +218,9 @@ def extract_file_groups_from_db_model_with_handling(logger, db_workspace) -> Lis def check_if_file_group_exists_with_handling(logger, db_workspace, file_group: str) -> bool: file_groups = extract_file_groups_from_db_model_with_handling(logger, db_workspace) return file_group in file_groups + +async def get_workspaces_of_user( + user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None +) -> List[WorkspaceRsrc]: + db_workspaces = await db_get_all_workspaces_by_user(user_id=user_id, start_date=start_date, end_date=end_date) + return [WorkspaceRsrc.from_db_workspace(db_workspace) for db_workspace in db_workspaces] From a4cdcf64bd17a667614d9efde1c4fb54a62f5d45 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 16:34:50 +0100 Subject: [PATCH 22/29] remove duplication of user workflow jobs --- .../operandi_server/routers/admin_panel.py | 18 ++++-------------- src/server/operandi_server/routers/user.py | 19 ++++--------------- .../operandi_server/routers/workflow_utils.py | 18 ++++++++++++++++-- 3 files changed, 24 insertions(+), 31 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index b9e92436..079f8e79 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -6,13 +6,10 @@ from operandi_server.models import PYUserInfo, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.constants import AccountType, ServerApiTag -from operandi_utils.database import ( - db_get_all_user_accounts, db_get_processing_stats, db_get_all_workflow_jobs_by_user, - db_get_workflow, db_get_workspace -) +from operandi_utils.database import db_get_all_user_accounts, db_get_processing_stats from operandi_utils.utils import send_bag_to_ola_hd from .user_utils import user_auth_with_handling -from .workflow_utils import get_workflows_of_user +from .workflow_utils import get_workflows_of_user, get_workflow_jobs_of_user from .workspace_utils import ( create_workspace_bag, get_workspaces_of_user, get_db_workspace_with_handling, validate_bag_with_handling ) @@ -105,19 +102,12 @@ async def user_processing_stats(self, user_id: str, auth: HTTPBasicCredentials = async def user_workflow_jobs( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), start_date: Optional[datetime] = None, end_date: Optional[datetime] = None - ) -> List: + ) -> List[WorkflowJobRsrc]: """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - db_workflow_jobs = await db_get_all_workflow_jobs_by_user( - user_id=user_id, start_date=start_date, end_date=end_date) - response = [] - for db_workflow_job in db_workflow_jobs: - db_workflow = await db_get_workflow(db_workflow_job.workflow_id) - db_workspace = await db_get_workspace(db_workflow_job.workspace_id) - response.append(WorkflowJobRsrc.from_db_workflow_job(db_workflow_job, db_workflow, db_workspace)) - return response + return await get_workflow_jobs_of_user(user_id=user_id, start_date=start_date, end_date=end_date) async def user_workspaces( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index a0ac3bed..be6fc938 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -5,13 +5,10 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials from operandi_utils.constants import AccountType, ServerApiTag -from operandi_utils.database import ( - db_get_processing_stats, db_get_all_workflow_jobs_by_user, db_get_user_account_with_email, - db_get_workflow, db_get_workspace -) +from operandi_utils.database import db_get_processing_stats, db_get_user_account_with_email from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics -from .workflow_utils import get_workflows_of_user +from .workflow_utils import get_workflows_of_user, get_workflow_jobs_of_user from .workspace_utils import get_workspaces_of_user from .user_utils import user_auth_with_handling, user_register_with_handling @@ -102,16 +99,8 @@ async def user_workflow_jobs( """ The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ - await user_auth_with_handling(self.logger, auth) - db_user_account = await db_get_user_account_with_email(email=auth.username) - db_workflow_jobs = await db_get_all_workflow_jobs_by_user( - user_id=db_user_account.user_id, start_date=start_date, end_date=end_date) - response = [] - for db_workflow_job in db_workflow_jobs: - db_workflow = await db_get_workflow(db_workflow_job.workflow_id) - db_workspace = await db_get_workspace(db_workflow_job.workspace_id) - response.append(WorkflowJobRsrc.from_db_workflow_job(db_workflow_job, db_workflow, db_workspace)) - return response + py_user_action = await user_auth_with_handling(self.logger, auth) + return await get_workflow_jobs_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) async def user_workspaces( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/workflow_utils.py b/src/server/operandi_server/routers/workflow_utils.py index 84e9ca23..9f814dba 100644 --- a/src/server/operandi_server/routers/workflow_utils.py +++ b/src/server/operandi_server/routers/workflow_utils.py @@ -3,11 +3,14 @@ from pathlib import Path from typing import List, Optional -from operandi_utils.database import db_get_workflow, db_get_workflow_job, db_get_all_workflows_by_user +from operandi_utils.database import ( + db_get_all_workflows_by_user, db_get_all_workflow_jobs_by_user, + db_get_workflow, db_get_workflow_job, db_get_workspace +) from operandi_utils.database.models import DBWorkflow, DBWorkflowJob from operandi_utils.oton import OTONConverter, OCRDValidator from operandi_utils.oton.constants import PARAMS_KEY_METS_SOCKET_PATH -from operandi_server.models import WorkflowRsrc +from operandi_server.models import WorkflowRsrc, WorkflowJobRsrc async def get_db_workflow_with_handling( @@ -120,3 +123,14 @@ async def get_workflows_of_user( ) -> List[WorkflowRsrc]: db_workflows = await db_get_all_workflows_by_user(user_id=user_id, start_date=start_date, end_date=end_date) return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] + +async def get_workflow_jobs_of_user( + user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None +) -> List[WorkflowJobRsrc]: + db_workflow_jobs = await db_get_all_workflow_jobs_by_user(user_id=user_id, start_date=start_date, end_date=end_date) + response = [] + for db_workflow_job in db_workflow_jobs: + db_workflow = await db_get_workflow(db_workflow_job.workflow_id) + db_workspace = await db_get_workspace(db_workflow_job.workspace_id) + response.append(WorkflowJobRsrc.from_db_workflow_job(db_workflow_job, db_workflow, db_workspace)) + return response From 183cb6864558033e68eeb7fda28e16fd9d774d33 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 16:40:19 +0100 Subject: [PATCH 23/29] refactor: method names to respect prefix --- src/server/operandi_server/routers/admin_panel.py | 10 +++++----- src/server/operandi_server/routers/user.py | 10 +++++----- src/server/operandi_server/routers/workflow_utils.py | 4 ++-- src/server/operandi_server/routers/workspace_utils.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 079f8e79..4a50a59a 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -9,9 +9,9 @@ from operandi_utils.database import db_get_all_user_accounts, db_get_processing_stats from operandi_utils.utils import send_bag_to_ola_hd from .user_utils import user_auth_with_handling -from .workflow_utils import get_workflows_of_user, get_workflow_jobs_of_user +from .workflow_utils import get_user_workflows, get_user_workflow_jobs from .workspace_utils import ( - create_workspace_bag, get_workspaces_of_user, get_db_workspace_with_handling, validate_bag_with_handling + create_workspace_bag, get_user_workspaces, get_db_workspace_with_handling, validate_bag_with_handling ) class RouterAdminPanel: @@ -107,7 +107,7 @@ async def user_workflow_jobs( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - return await get_workflow_jobs_of_user(user_id=user_id, start_date=start_date, end_date=end_date) + return await get_user_workflow_jobs(user_id=user_id, start_date=start_date, end_date=end_date) async def user_workspaces( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), @@ -117,7 +117,7 @@ async def user_workspaces( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - return await get_workspaces_of_user(user_id=user_id, start_date=start_date, end_date=end_date) + return await get_user_workspaces(user_id=user_id, start_date=start_date, end_date=end_date) async def user_workflows( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), @@ -127,4 +127,4 @@ async def user_workflows( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - return await get_workflows_of_user(user_id=user_id, start_date=start_date, end_date=end_date) + return await get_user_workflows(user_id=user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index be6fc938..86f9931f 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -8,8 +8,8 @@ from operandi_utils.database import db_get_processing_stats, db_get_user_account_with_email from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics -from .workflow_utils import get_workflows_of_user, get_workflow_jobs_of_user -from .workspace_utils import get_workspaces_of_user +from .workflow_utils import get_user_workflows, get_user_workflow_jobs +from .workspace_utils import get_user_workspaces from .user_utils import user_auth_with_handling, user_register_with_handling @@ -100,7 +100,7 @@ async def user_workflow_jobs( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ py_user_action = await user_auth_with_handling(self.logger, auth) - return await get_workflow_jobs_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) + return await get_user_workflow_jobs(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) async def user_workspaces( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), @@ -110,7 +110,7 @@ async def user_workspaces( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ py_user_action = await user_auth_with_handling(self.logger, auth) - return await get_workspaces_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) + return await get_user_workspaces(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) async def user_workflows( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), @@ -120,4 +120,4 @@ async def user_workflows( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ py_user_action = await user_auth_with_handling(self.logger, auth) - return await get_workflows_of_user(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) + return await get_user_workflows(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/workflow_utils.py b/src/server/operandi_server/routers/workflow_utils.py index 9f814dba..700db6dc 100644 --- a/src/server/operandi_server/routers/workflow_utils.py +++ b/src/server/operandi_server/routers/workflow_utils.py @@ -118,13 +118,13 @@ async def convert_oton_with_handling( logger.error(f"{message}, error: {error}") raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=message) -async def get_workflows_of_user( +async def get_user_workflows( user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None ) -> List[WorkflowRsrc]: db_workflows = await db_get_all_workflows_by_user(user_id=user_id, start_date=start_date, end_date=end_date) return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] -async def get_workflow_jobs_of_user( +async def get_user_workflow_jobs( user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None ) -> List[WorkflowJobRsrc]: db_workflow_jobs = await db_get_all_workflow_jobs_by_user(user_id=user_id, start_date=start_date, end_date=end_date) diff --git a/src/server/operandi_server/routers/workspace_utils.py b/src/server/operandi_server/routers/workspace_utils.py index 79cee565..effcf0a6 100644 --- a/src/server/operandi_server/routers/workspace_utils.py +++ b/src/server/operandi_server/routers/workspace_utils.py @@ -219,7 +219,7 @@ def check_if_file_group_exists_with_handling(logger, db_workspace, file_group: s file_groups = extract_file_groups_from_db_model_with_handling(logger, db_workspace) return file_group in file_groups -async def get_workspaces_of_user( +async def get_user_workspaces( user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None ) -> List[WorkspaceRsrc]: db_workspaces = await db_get_all_workspaces_by_user(user_id=user_id, start_date=start_date, end_date=end_date) From 5505f8a52b012ce4edd5b2252f95f6747c05c67c Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:00:06 +0100 Subject: [PATCH 24/29] refactor: get all users --- .../operandi_server/routers/admin_panel.py | 20 ++++--------------- src/server/operandi_server/routers/user.py | 9 +++------ .../operandi_server/routers/user_utils.py | 20 ++++++++++++++++--- src/utils/operandi_utils/database/__init__.py | 3 ++- 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 4a50a59a..49743596 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -6,9 +6,8 @@ from operandi_server.models import PYUserInfo, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.constants import AccountType, ServerApiTag -from operandi_utils.database import db_get_all_user_accounts, db_get_processing_stats from operandi_utils.utils import send_bag_to_ola_hd -from .user_utils import user_auth_with_handling +from .user_utils import get_user_accounts, get_user_processing_stats_with_handling, user_auth_with_handling from .workflow_utils import get_user_workflows, get_user_workflow_jobs from .workspace_utils import ( create_workspace_bag, get_user_workspaces, get_db_workspace_with_handling, validate_bag_with_handling @@ -80,24 +79,13 @@ async def push_to_ola_hd(self, workspace_id: str, auth: HTTPBasicCredentials = D } return response_message - async def get_users(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())): + async def get_users(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> List[PYUserInfo]: await self.auth_admin_with_handling(auth) - users = await db_get_all_user_accounts() - return [PYUserInfo.from_db_user_account(user) for user in users] + return await get_user_accounts() async def user_processing_stats(self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())): await self.auth_admin_with_handling(auth) - try: - db_processing_stats = await db_get_processing_stats(user_id) - if not db_processing_stats: - message = f"Processing stats not found for the user_id: {user_id}" - self.logger.error(message) - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=message) - except Exception as error: - message = f"Failed to fetch processing stats for user_id: {user_id}, error: {error}" - self.logger.error(message) - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=message) - return db_processing_stats + return await get_user_processing_stats_with_handling(self.logger, user_id=user_id) async def user_workflow_jobs( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index 86f9931f..87cb93f9 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -5,12 +5,11 @@ from fastapi.security import HTTPBasic, HTTPBasicCredentials from operandi_utils.constants import AccountType, ServerApiTag -from operandi_utils.database import db_get_processing_stats, db_get_user_account_with_email from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics from .workflow_utils import get_user_workflows, get_user_workflow_jobs from .workspace_utils import get_user_workspaces -from .user_utils import user_auth_with_handling, user_register_with_handling +from .user_utils import get_user_processing_stats_with_handling, user_auth_with_handling, user_register_with_handling class RouterUser: @@ -87,10 +86,8 @@ async def user_register( return PYUserAction.from_db_user_account(action=action, db_user_account=db_user_account) async def user_processing_stats(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())): - await user_auth_with_handling(self.logger, auth) - db_user_account = await db_get_user_account_with_email(email=auth.username) - db_processing_stats = await db_get_processing_stats(db_user_account.user_id) - return db_processing_stats + py_user_action = await user_auth_with_handling(self.logger, auth) + return await get_user_processing_stats_with_handling(self.logger, user_id=py_user_action.user_id) async def user_workflow_jobs( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/user_utils.py b/src/server/operandi_server/routers/user_utils.py index 6a624b7b..f3b70be0 100644 --- a/src/server/operandi_server/routers/user_utils.py +++ b/src/server/operandi_server/routers/user_utils.py @@ -1,11 +1,12 @@ from fastapi import Depends, HTTPException, status from fastapi.security import HTTPBasic, HTTPBasicCredentials +from typing import List from operandi_utils.constants import AccountType from operandi_utils.database import ( - db_create_processing_stats, db_create_user_account, db_get_user_account, db_get_user_account_with_email, - DBUserAccount) -from operandi_server.models import PYUserAction + db_create_processing_stats, db_create_user_account, db_get_all_user_accounts, db_get_user_account, + db_get_user_account_with_email, db_get_processing_stats, DBProcessingStatistics, DBUserAccount) +from operandi_server.models import PYUserAction, PYUserInfo from .password_utils import encrypt_password, validate_password @@ -71,3 +72,16 @@ async def user_register_with_handling( message = f"Another user is already registered with email: {email}" logger.error(f"{message}") raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, headers=headers, detail=message) + +async def get_user_processing_stats_with_handling(logger, user_id: str) -> DBProcessingStatistics: + try: + db_processing_stats = await db_get_processing_stats(user_id=user_id) + except RuntimeError as error: + message = f"Processing stats not found for the user_id: {user_id}" + logger.error(f"{message}, error: {error}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=message) + return db_processing_stats + +async def get_user_accounts() -> List[PYUserInfo]: + users = await db_get_all_user_accounts() + return [PYUserInfo.from_db_user_account(user) for user in users] diff --git a/src/utils/operandi_utils/database/__init__.py b/src/utils/operandi_utils/database/__init__.py index 27a22aab..d6e9c842 100644 --- a/src/utils/operandi_utils/database/__init__.py +++ b/src/utils/operandi_utils/database/__init__.py @@ -1,5 +1,6 @@ __all__ = [ "DBHPCSlurmJob", + "DBProcessingStatistics", "DBUserAccount", "DBWorkflow", "DBWorkflowJob", @@ -56,7 +57,7 @@ ] from .base import db_initiate_database, sync_db_initiate_database -from .models import DBHPCSlurmJob, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace +from .models import DBHPCSlurmJob, DBProcessingStatistics, DBUserAccount, DBWorkflow, DBWorkflowJob, DBWorkspace from .db_hpc_slurm_job import ( db_create_hpc_slurm_job, db_get_hpc_slurm_job, From b2d3b0f6745836707405abbea0cac9634e1ebd95 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:26:47 +0100 Subject: [PATCH 25/29] refactor: push status request --- .../operandi_server/routers/workflow.py | 29 +++++-------------- .../operandi_server/routers/workflow_utils.py | 21 ++++++++++++++ 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/server/operandi_server/routers/workflow.py b/src/server/operandi_server/routers/workflow.py index 8afc72f3..7ed76e8e 100644 --- a/src/server/operandi_server/routers/workflow.py +++ b/src/server/operandi_server/routers/workflow.py @@ -10,7 +10,6 @@ from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status, UploadFile from fastapi.responses import FileResponse from fastapi.security import HTTPBasic, HTTPBasicCredentials -from starlette.status import HTTP_404_NOT_FOUND from operandi_utils import get_nf_wfs_dir, get_ocrd_process_wfs_dir from operandi_utils.constants import AccountType, ServerApiTag, StateJob, StateWorkspace @@ -18,8 +17,7 @@ db_create_workflow, db_create_workflow_job, db_get_hpc_slurm_job, db_get_workflow, db_update_workspace, db_increase_processing_stats_with_handling) from operandi_utils.oton import OTONConverter -from operandi_utils.rabbitmq import ( - get_connection_publisher, RABBITMQ_QUEUE_JOB_STATUSES, RABBITMQ_QUEUE_HARVESTER, RABBITMQ_QUEUE_USERS) +from operandi_utils.rabbitmq import get_connection_publisher, RABBITMQ_QUEUE_HARVESTER, RABBITMQ_QUEUE_USERS from operandi_server.constants import ( SERVER_OTON_CONVERSIONS, SERVER_WORKFLOWS_ROUTER, SERVER_WORKFLOW_JOBS_ROUTER, SERVER_WORKSPACES_ROUTER) from operandi_server.files_manager import ( @@ -30,8 +28,10 @@ convert_oton_with_handling, get_db_workflow_job_with_handling, get_db_workflow_with_handling, + nf_script_executable_steps_with_handling, nf_script_uses_mets_server_with_handling, - validate_oton_with_handling, nf_script_executable_steps_with_handling + push_status_request_to_rabbitmq, + validate_oton_with_handling ) from .workspace_utils import check_if_file_group_exists_with_handling, get_db_workspace_with_handling from .user_utils import user_auth_with_handling @@ -119,19 +119,6 @@ def __del__(self): if self.rmq_publisher: self.rmq_publisher.disconnect() - async def _push_status_request_to_rabbitmq(self, job_id: str): - # Create the job status message to be sent to the RabbitMQ queue - try: - job_status_message = {"job_id": f"{job_id}"} - self.logger.debug(f"Encoding the job status RabbitMQ message: {job_status_message}") - encoded_wf_message = dumps(job_status_message).encode(encoding="utf-8") - self.logger.debug(f"Pushing to the RabbitMQ queue for job statuses: {RABBITMQ_QUEUE_JOB_STATUSES}") - self.rmq_publisher.publish_to_queue(queue_name=RABBITMQ_QUEUE_JOB_STATUSES, message=encoded_wf_message) - except Exception as error: - message = "Failed to push status request to RabbitMQ" - self.logger.error(f"{message}, error: {error}") - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=message) - async def produce_production_workflows( self, ocrd_process_wf_dir: Path = get_ocrd_process_wfs_dir(), @@ -292,7 +279,7 @@ async def get_workflow_job_status( self.logger, workflow_id=workflow_id, check_deleted=False, check_local_existence=False) if db_wf_job.job_state != StateJob.FAILED and db_wf_job.job_state != StateJob.SUCCESS: - await self._push_status_request_to_rabbitmq(job_id=job_id) + await push_status_request_to_rabbitmq(self.logger, self.rmq_publisher, job_id=job_id) # TODO: Fix that by getting rid of the FileManager module try: @@ -316,11 +303,11 @@ async def download_workflow_job_logs( `curl -X GET SERVER_ADDR/workflow/{workflow_id}/logs -H "accept: application/vnd.zip" -o foo.zip` """ await user_auth_with_handling(self.logger, auth) - await self._push_status_request_to_rabbitmq(job_id=job_id) db_wf_job = await get_db_workflow_job_with_handling(self.logger, job_id=job_id, check_local_existence=True) job_state = db_wf_job.job_state if job_state != StateJob.SUCCESS and job_state != StateJob.FAILED: + await push_status_request_to_rabbitmq(self.logger, self.rmq_publisher, job_id=job_id) message = f"Cannot download logs of a job unless it succeeds or fails: {job_id}" self.logger.exception(message) raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=message) @@ -340,11 +327,11 @@ async def download_workflow_job_logs( async def download_workflow_job_hpc_log( self, workflow_id: str, job_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic())): await user_auth_with_handling(self.logger, auth) - await self._push_status_request_to_rabbitmq(job_id=job_id) db_wf_job = await get_db_workflow_job_with_handling(self.logger, job_id=job_id, check_local_existence=True) job_state = db_wf_job.job_state if job_state != StateJob.SUCCESS and job_state != StateJob.FAILED: + await push_status_request_to_rabbitmq(self.logger, self.rmq_publisher, job_id=job_id) message = f"Cannot download logs of a job unless it succeeds or fails: {job_id}" self.logger.exception(message) raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=message) @@ -361,7 +348,7 @@ async def download_workflow_job_hpc_log( slurm_job_log_path = Path(wf_job_local, slurm_job_log) if not slurm_job_log_path.exists(): message = f"No slurm job log file was found for job id: {job_id}" - raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=message) + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=message) return FileResponse(path=slurm_job_log_path, filename=slurm_job_log, media_type="application/text") async def submit_to_rabbitmq_queue( diff --git a/src/server/operandi_server/routers/workflow_utils.py b/src/server/operandi_server/routers/workflow_utils.py index 700db6dc..75cc81b5 100644 --- a/src/server/operandi_server/routers/workflow_utils.py +++ b/src/server/operandi_server/routers/workflow_utils.py @@ -1,8 +1,10 @@ from datetime import datetime from fastapi import HTTPException, status +from json import dumps from pathlib import Path from typing import List, Optional +from operandi_utils.constants import StateJob from operandi_utils.database import ( db_get_all_workflows_by_user, db_get_all_workflow_jobs_by_user, db_get_workflow, db_get_workflow_job, db_get_workspace @@ -10,6 +12,7 @@ from operandi_utils.database.models import DBWorkflow, DBWorkflowJob from operandi_utils.oton import OTONConverter, OCRDValidator from operandi_utils.oton.constants import PARAMS_KEY_METS_SOCKET_PATH +from operandi_utils.rabbitmq import RABBITMQ_QUEUE_JOB_STATUSES from operandi_server.models import WorkflowRsrc, WorkflowJobRsrc @@ -124,12 +127,30 @@ async def get_user_workflows( db_workflows = await db_get_all_workflows_by_user(user_id=user_id, start_date=start_date, end_date=end_date) return [WorkflowRsrc.from_db_workflow(db_workflow) for db_workflow in db_workflows] +async def push_status_request_to_rabbitmq(logger, rmq_publisher, job_id: str): + # Create the job status message to be sent to the RabbitMQ queue + try: + job_status_message = {"job_id": f"{job_id}"} + logger.debug(f"Encoding the job status RabbitMQ message: {job_status_message}") + encoded_wf_message = dumps(job_status_message).encode(encoding="utf-8") + logger.debug(f"Pushing to the RabbitMQ queue for job statuses: {RABBITMQ_QUEUE_JOB_STATUSES}") + rmq_publisher.publish_to_queue(queue_name=RABBITMQ_QUEUE_JOB_STATUSES, message=encoded_wf_message) + except Exception as error: + message = "Failed to push status request to RabbitMQ" + logger.error(f"{message}, error: {error}") + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=message) + async def get_user_workflow_jobs( user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None ) -> List[WorkflowJobRsrc]: db_workflow_jobs = await db_get_all_workflow_jobs_by_user(user_id=user_id, start_date=start_date, end_date=end_date) response = [] for db_workflow_job in db_workflow_jobs: + job_state = db_workflow_job.job_state + if job_state != StateJob.SUCCESS and job_state != StateJob.FAILED: + # TODO: Call here the 'push_status_request_to_rabbitmq' once + # that method is also refactored to be rmq_publisher independent + pass db_workflow = await db_get_workflow(db_workflow_job.workflow_id) db_workspace = await db_get_workspace(db_workflow_job.workspace_id) response.append(WorkflowJobRsrc.from_db_workflow_job(db_workflow_job, db_workflow, db_workspace)) From 90ec3676f6559dfb2b8238cae88b77fb1dc2721f Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:45:39 +0100 Subject: [PATCH 26/29] refactor: remove versioning of nf scripts from tests --- tests/assets/oton/test_output_nextflow1_apptainer.nf | 2 +- tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf | 2 +- tests/assets/oton/test_output_nextflow1_docker.nf | 2 +- tests/assets/oton/test_output_nextflow1_docker_with_MS.nf | 2 +- tests/assets/oton/test_output_nextflow1_local.nf | 2 +- tests/assets/oton/test_output_nextflow1_local_with_MS.nf | 2 +- tests/assets/oton/test_output_nextflow2.nf | 2 +- tests/assets/oton/test_output_nextflow3.nf | 2 +- tests/assets/oton/test_output_nextflow4.nf | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/assets/oton/test_output_nextflow1_apptainer.nf b/tests/assets/oton/test_output_nextflow1_apptainer.nf index 4e9c0699..0b63c78e 100644 --- a/tests/assets/oton/test_output_nextflow1_apptainer.nf +++ b/tests/assets/oton/test_output_nextflow1_apptainer.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf index ba9bf528..dbdf3455 100644 --- a/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_apptainer_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow1_docker.nf b/tests/assets/oton/test_output_nextflow1_docker.nf index fdf03c09..fd63a5d3 100644 --- a/tests/assets/oton/test_output_nextflow1_docker.nf +++ b/tests/assets/oton/test_output_nextflow1_docker.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf index 05e8966d..5fa87c01 100644 --- a/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_docker_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow1_local.nf b/tests/assets/oton/test_output_nextflow1_local.nf index ced217c9..541a93b5 100644 --- a/tests/assets/oton/test_output_nextflow1_local.nf +++ b/tests/assets/oton/test_output_nextflow1_local.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf index e11f4346..40803328 100644 --- a/tests/assets/oton/test_output_nextflow1_local_with_MS.nf +++ b/tests/assets/oton/test_output_nextflow1_local_with_MS.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow2.nf b/tests/assets/oton/test_output_nextflow2.nf index a2798bdf..7850c980 100644 --- a/tests/assets/oton/test_output_nextflow2.nf +++ b/tests/assets/oton/test_output_nextflow2.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" diff --git a/tests/assets/oton/test_output_nextflow3.nf b/tests/assets/oton/test_output_nextflow3.nf index 8e8df4ac..19ad338e 100644 --- a/tests/assets/oton/test_output_nextflow3.nf +++ b/tests/assets/oton/test_output_nextflow3.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-GT-SEG-BLOCK,OCR-D-OCR" diff --git a/tests/assets/oton/test_output_nextflow4.nf b/tests/assets/oton/test_output_nextflow4.nf index 7f5c313d..a72010de 100644 --- a/tests/assets/oton/test_output_nextflow4.nf +++ b/tests/assets/oton/test_output_nextflow4.nf @@ -1,4 +1,4 @@ -// This workflow was automatically generated by the v2.18.0 operandi_utils.oton module +// This workflow was automatically generated by the operandi_utils.oton module nextflow.enable.dsl = 2 params.input_file_group = "OCR-D-IMG" From 5857c4354213d6eb48657505c86e331eb2b01893 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:45:59 +0100 Subject: [PATCH 27/29] fix: job status states of checking data in batch --- src/server/operandi_server/routers/admin_panel.py | 11 +++++++++-- src/server/operandi_server/routers/user.py | 9 ++++++++- src/server/operandi_server/routers/workflow_utils.py | 6 ++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 49743596..61dae3b6 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -7,6 +7,7 @@ from operandi_server.models import PYUserInfo, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.constants import AccountType, ServerApiTag from operandi_utils.utils import send_bag_to_ola_hd +from operandi_utils.rabbitmq import get_connection_publisher from .user_utils import get_user_accounts, get_user_processing_stats_with_handling, user_auth_with_handling from .workflow_utils import get_user_workflows, get_user_workflow_jobs from .workspace_utils import ( @@ -15,7 +16,12 @@ class RouterAdminPanel: def __init__(self): - self.logger = getLogger("operandi_server.routers.user") + self.logger = getLogger("operandi_server.routers.admin_panel") + + self.logger.info(f"Trying to connect RMQ Publisher") + self.rmq_publisher = get_connection_publisher(enable_acks=True) + self.logger.info(f"RMQPublisher connected") + self.router = APIRouter(tags=[ServerApiTag.ADMIN]) self.router.add_api_route( path="/admin/users", @@ -95,7 +101,8 @@ async def user_workflow_jobs( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ await self.auth_admin_with_handling(auth) - return await get_user_workflow_jobs(user_id=user_id, start_date=start_date, end_date=end_date) + return await get_user_workflow_jobs( + self.logger, self.rmq_publisher, user_id, start_date, end_date) async def user_workspaces( self, user_id: str, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index 87cb93f9..e2b4a414 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -7,6 +7,7 @@ from operandi_utils.constants import AccountType, ServerApiTag from operandi_server.models import PYUserAction, WorkflowJobRsrc, WorkspaceRsrc, WorkflowRsrc from operandi_utils.database.models import DBProcessingStatistics +from operandi_utils.rabbitmq import get_connection_publisher from .workflow_utils import get_user_workflows, get_user_workflow_jobs from .workspace_utils import get_user_workspaces from .user_utils import get_user_processing_stats_with_handling, user_auth_with_handling, user_register_with_handling @@ -15,6 +16,11 @@ class RouterUser: def __init__(self): self.logger = getLogger("operandi_server.routers.user") + + self.logger.info(f"Trying to connect RMQ Publisher") + self.rmq_publisher = get_connection_publisher(enable_acks=True) + self.logger.info(f"RMQPublisher connected") + self.router = APIRouter(tags=[ServerApiTag.USER]) self.router.add_api_route( path="/user/register", @@ -97,7 +103,8 @@ async def user_workflow_jobs( The expected datetime format: YYYY-MM-DDTHH:MM:SS, for example, 2024-12-01T18:17:15 """ py_user_action = await user_auth_with_handling(self.logger, auth) - return await get_user_workflow_jobs(user_id=py_user_action.user_id, start_date=start_date, end_date=end_date) + return await get_user_workflow_jobs( + self.logger, self.rmq_publisher, py_user_action.user_id, start_date, end_date) async def user_workspaces( self, auth: HTTPBasicCredentials = Depends(HTTPBasic()), diff --git a/src/server/operandi_server/routers/workflow_utils.py b/src/server/operandi_server/routers/workflow_utils.py index 75cc81b5..9d04bc5e 100644 --- a/src/server/operandi_server/routers/workflow_utils.py +++ b/src/server/operandi_server/routers/workflow_utils.py @@ -141,16 +141,14 @@ async def push_status_request_to_rabbitmq(logger, rmq_publisher, job_id: str): raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=message) async def get_user_workflow_jobs( - user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None + logger, rmq_publisher, user_id: str, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None ) -> List[WorkflowJobRsrc]: db_workflow_jobs = await db_get_all_workflow_jobs_by_user(user_id=user_id, start_date=start_date, end_date=end_date) response = [] for db_workflow_job in db_workflow_jobs: job_state = db_workflow_job.job_state if job_state != StateJob.SUCCESS and job_state != StateJob.FAILED: - # TODO: Call here the 'push_status_request_to_rabbitmq' once - # that method is also refactored to be rmq_publisher independent - pass + await push_status_request_to_rabbitmq(logger, rmq_publisher, db_workflow_job.job_id) db_workflow = await db_get_workflow(db_workflow_job.workflow_id) db_workspace = await db_get_workspace(db_workflow_job.workspace_id) response.append(WorkflowJobRsrc.from_db_workflow_job(db_workflow_job, db_workflow, db_workspace)) From b9d0772c4b59cceca2fd597d1ae13c81e870ed8d Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:46:44 +0100 Subject: [PATCH 28/29] release: v2.18.3 --- src/utils/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/setup.py b/src/utils/setup.py index 858b9429..a896d42b 100644 --- a/src/utils/setup.py +++ b/src/utils/setup.py @@ -5,7 +5,7 @@ setup( name='operandi_utils', - version='2.18.2', + version='2.18.3', description='OPERANDI - Utils', long_description=open('README.md').read(), long_description_content_type='text/markdown', From b6b96de8189c9a77f75a90300e03d6b09accbf1a Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 6 Dec 2024 17:48:18 +0100 Subject: [PATCH 29/29] fix: disconnect rmq in destructor --- src/server/operandi_server/routers/admin_panel.py | 4 ++++ src/server/operandi_server/routers/user.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/server/operandi_server/routers/admin_panel.py b/src/server/operandi_server/routers/admin_panel.py index 61dae3b6..ae07de72 100644 --- a/src/server/operandi_server/routers/admin_panel.py +++ b/src/server/operandi_server/routers/admin_panel.py @@ -54,6 +54,10 @@ def __init__(self): summary="Push a workspace to Ola-HD service" ) + def __del__(self): + if self.rmq_publisher: + self.rmq_publisher.disconnect() + async def auth_admin_with_handling(self, auth: HTTPBasicCredentials): py_user_action = await user_auth_with_handling(self.logger, auth) if py_user_action.account_type != AccountType.ADMIN: diff --git a/src/server/operandi_server/routers/user.py b/src/server/operandi_server/routers/user.py index e2b4a414..12a518ee 100644 --- a/src/server/operandi_server/routers/user.py +++ b/src/server/operandi_server/routers/user.py @@ -59,6 +59,10 @@ def __init__(self): response_model=List, response_model_exclude_unset=True, response_model_exclude_none=True ) + def __del__(self): + if self.rmq_publisher: + self.rmq_publisher.disconnect() + async def user_login(self, auth: HTTPBasicCredentials = Depends(HTTPBasic())) -> PYUserAction: """ Used for user authentication.