diff --git a/CHANGES.rst b/CHANGES.rst index f76432bd3..b4a58cce5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,11 +10,19 @@ Changes Changes: -------- -- No change. +- | Modify problematic output location and execution methodology of ``file2string_array`` process so it does what + it actually advertises in its ``abstract`` description and doesn't result in error after execution. + | + | This modification actually changes the internal operation accomplished by ``file2string_array`` process + since it was attempting to create directly a CWL output of type ``File[]``. This is not yet supported + in `Weaver` (see issue `#25 `_) because `OGC API - Processes` + does not allow output multiplicity under a same output ID. Fixes: ------ -- No change. +- Fix invalid ``python`` reference location in ``file2string_array`` process CWL definition + (fixes `#275 `_). +- Fix missing ``version`` field definition for ``file2string_array`` process and set it as ``1.0``. `3.3.0 `_ (2021-07-16) ======================================================================== diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py index d2f782e8c..d85261f81 100644 --- a/tests/functional/test_builtin.py +++ b/tests/functional/test_builtin.py @@ -5,9 +5,16 @@ from typing import TYPE_CHECKING import pytest +import mock from tests.functional.utils import WpsPackageConfigBase -from tests.utils import get_settings_from_testapp, mocked_execute_process, mocked_sub_requests +from tests.utils import ( + get_settings_from_testapp, + mocked_execute_process, + mocked_http_file, + mocked_reference_test_file, + mocked_sub_requests +) from weaver.execute import EXECUTE_TRANSMISSION_MODE_REFERENCE from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF from weaver.processes.builtin import register_builtin_processes @@ -117,3 +124,78 @@ def test_jsonarray2netcdf_execute(self): assert isinstance(nc_path, str) and len(nc_path) assert nc_path.startswith(wps_out) assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1] + + def test_file2string_array_describe(self): + resp = self.app.get("/processes/file2string_array", headers=self.json_headers) + assert resp.status_code == 200 + assert resp.content_type in CONTENT_TYPE_APP_JSON + assert resp.json["process"]["id"] == "file2string_array" + assert resp.json["process"]["abstract"] not in ["", None] + assert resp.json["process"]["executeEndpoint"] == "https://localhost/processes/file2string_array/jobs" + assert isinstance(resp.json["process"]["inputs"], list) + assert len(resp.json["process"]["inputs"]) == 1 + assert resp.json["process"]["inputs"][0]["id"] == "input" + assert isinstance(resp.json["process"]["inputs"][0]["formats"], list) + assert len(resp.json["process"]["inputs"][0]["formats"]) == 1 # must exist for file, mime-type not important + assert isinstance(resp.json["process"]["outputs"], list) + assert len(resp.json["process"]["outputs"]) == 1 + assert resp.json["process"]["outputs"][0]["id"] == "output" + assert isinstance(resp.json["process"]["outputs"][0]["formats"], list) + assert len(resp.json["process"]["outputs"][0]["formats"]) == 1 + assert resp.json["process"]["outputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON # important here + + # FIXME: must simulate otherwise tmp-file gets fetched and copied to pywps dir, changing output content (wrong path) + @mock.patch("weaver.processes.wps_package.WpsPackage.must_fetch", return_value=False) + def test_file2string_array_execute(self, __mock): + fake_href = "https://random-server.com/fake-data.txt" + with contextlib.ExitStack() as stack_exec: + data = { + "mode": "async", + "response": "document", + "inputs": [{"id": "input", "href": fake_href}], + "outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}], + } + + for mock_exec in mocked_execute_process(): + stack_exec.enter_context(mock_exec) + path = "/processes/file2string_array/jobs" + resp = mocked_sub_requests(self.app, "post_json", path, + data=data, headers=self.json_headers, only_local=True) + + assert resp.status_code == 201, "Error: {}".format(resp.json) + assert resp.content_type in CONTENT_TYPE_APP_JSON + job_url = resp.json["location"] + results = self.monitor_job(job_url) + + # first validate format of OGC-API results + assert "output" in results, "Expected result ID 'output' in response body" + assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict" + assert "href" in results["output"] + assert "format" in results["output"] + fmt = results["output"]["format"] # type: JSON + assert isinstance(fmt, dict), "Result format should be provided with content details" + assert "mediaType" in fmt + assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition" + assert fmt["mediaType"] == CONTENT_TYPE_APP_JSON, "Result 'output' format expected to be JSON file" + out_path = results["output"]["href"] + assert isinstance(out_path, str) and len(out_path) + settings = get_settings_from_testapp(self.app) + wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path")) + real_path = out_path.replace(wps_out, settings.get("weaver.wps_output_dir")) + assert out_path.startswith(wps_out) + assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1] + assert os.path.isfile(real_path) + with open(real_path, "r") as f: + out_data = json.load(f) + assert out_data == [fake_href] + + # if everything was valid for results, validate equivalent but differently formatted outputs response + output_url = job_url + "/outputs" + resp = self.app.get(output_url, headers=self.json_headers) + assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json) + outputs = resp.json + assert outputs["outputs"][0]["id"] == "output" + out_path = outputs["outputs"][0]["href"] + assert isinstance(out_path, str) and len(out_path) + assert out_path.startswith(wps_out) + assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1] diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 1ad61fd98..c88885c04 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -1028,19 +1028,19 @@ def tmp(value): assert desc["process"] is not None - test_bucket_ref = mocked_aws_s3_bucket_test_file( + test_bucket_ref, _ = mocked_aws_s3_bucket_test_file( "wps-process-test-bucket", "input_file_s3.txt", "This is a generated file for s3 test" ) - test_http_ref = mocked_reference_test_file( + test_http_ref, _ = mocked_reference_test_file( "input_file_http.txt", "http", "This is a generated file for http test" ) - test_file_ref = mocked_reference_test_file( + test_file_ref, _ = mocked_reference_test_file( "input_file_ref.txt", "file", "This is a generated file for file test" @@ -1596,7 +1596,7 @@ def test_execute_application_package_process_with_bucket(self): input_file_s3 = "input-s3.txt" input_file_http = "media-types.txt" # use some random HTTP location that actually exists (will be fetched) test_http_ref = "https://www.iana.org/assignments/media-types/{}".format(input_file_http) - test_bucket_ref = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3) + test_bucket_ref, _ = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3) exec_body = { "mode": EXECUTE_MODE_ASYNC, "response": EXECUTE_RESPONSE_DOCUMENT, diff --git a/tests/test_utils.py b/tests/test_utils.py index 631602ef2..c947dd200 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -552,7 +552,7 @@ def test_fetch_file_remote_s3_bucket(): test_file_name = "test-file.txt" test_file_data = "dummy file" test_bucket_name = "test-fake-bucket" - test_bucket_ref = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data) + test_bucket_ref, _ = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data) result = fetch_file(test_bucket_ref, tmpdir) assert result == os.path.join(tmpdir, test_file_name) assert os.path.isfile(result) diff --git a/tests/utils.py b/tests/utils.py index 3553362fc..1961da257 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -34,7 +34,7 @@ from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning if TYPE_CHECKING: - from typing import Any, Callable, Iterable, List, Optional, Type, Union + from typing import Any, Callable, Iterable, List, Optional, Tuple, Type, Union import botocore.client # noqa from owslib.wps import Process as ProcessOWSWPS @@ -526,7 +526,7 @@ def wrapped(*args, **kwargs): def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test file inside test S3 bucket"): - # type: (str,str, str) -> str + # type: (str,str, str) -> Tuple[str, str] """ Generates a test file reference from dummy data that will be uploaded to the specified S3 bucket name using the provided file key. @@ -548,17 +548,24 @@ def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test fi with tempfile.NamedTemporaryFile(mode="w") as tmp_file: tmp_file.write(file_content) tmp_file.flush() - s3.upload_file(Bucket=bucket_name, Filename=tmp_file.name, Key=file_name) - return "s3://{}/{}".format(bucket_name, file_name) + tmp_path = tmp_file.name + s3.upload_file(Bucket=bucket_name, Filename=tmp_path, Key=file_name) + tmp_href = "s3://{}/{}".format(bucket_name, file_name) + return tmp_href, tmp_path def mocked_http_file(test_func): # type: (Callable[[...], Any]) -> Callable """ Creates a mock of the function :func:`fetch_file`, to fetch a generated file locally, for test purposes only. + For instance, calling this function with :func:`mocked_http_file` decorator will effectively employ the mocked :func:`fetch_file` and return a generated local file. + Only file references that employ :py:data:`MOCK_HTTP_REF` as prefix will be mocked during file fetching step. + These kind of references can be obtained using the utility function :func:`mocked_reference_test_file`. + Other references will employ usual fetching methodology. + .. seealso:: - :func:`mocked_reference_test_file` """ @@ -574,16 +581,26 @@ def wrapped(*args, **kwargs): return wrapped -def mocked_reference_test_file(file_name, href_type, file_content="This is a generated file for href test"): - # type: (str,str,str) -> str +def mocked_reference_test_file(file_name, href_type, file_content="test data", stack_context=None): + # type: (str, str, str, Optional[contextlib.ExitStack]) -> Tuple[str, str] """ Generates a test file reference from dummy data for http and file href types. + :param file_name: name of the temporary file to generate. + :param href_type: schema type of href (file, http, etc.) + :param file_content: contents of the temporary file. + :param stack_context: active context for auto-cleanup of temporary directly when closed. + :return: tuple of the generated reference and real temporary file path as (href, path) + .. seealso:: - :func:`mocked_http_file` """ - tmpdir = tempfile.mkdtemp() - path = os.path.join(tmpdir, file_name) + if isinstance(stack_context, contextlib.ExitStack): + tmpdir = stack_context.enter_context(tempfile.TemporaryDirectory()) + else: + tmpdir = tempfile.mkdtemp() + path = os.path.abspath(os.path.join(tmpdir, file_name)) with open(path, "w") as tmp_file: tmp_file.write(file_content) - return "file://{}".format(path) if href_type == "file" else os.path.join(MOCK_HTTP_REF, path) + href = "file://{}".format(path) if href_type == "file" else (MOCK_HTTP_REF + path) + return href, path diff --git a/weaver/processes/builtin/file2string_array.cwl b/weaver/processes/builtin/file2string_array.cwl index 65a238365..11f7d3f5c 100644 --- a/weaver/processes/builtin/file2string_array.cwl +++ b/weaver/processes/builtin/file2string_array.cwl @@ -1,8 +1,7 @@ #!/usr/bin/env cwl-runner cwlVersion: v1.0 class: CommandLineTool -# target the installed python pointing to weaver conda env to allow imports -baseCommand: ${WEAVER_ROOT_DIR}/bin/python +baseCommand: python arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)] inputs: input: @@ -15,5 +14,7 @@ outputs: output: type: File format: iana:application/json + outputBinding: + glob: "output.json" $namespaces: iana: "https://www.iana.org/assignments/media-types/" diff --git a/weaver/processes/builtin/file2string_array.py b/weaver/processes/builtin/file2string_array.py index 280c11451..0a3ceec62 100644 --- a/weaver/processes/builtin/file2string_array.py +++ b/weaver/processes/builtin/file2string_array.py @@ -1,6 +1,6 @@ #!/usr/bin/env python """ -Transforms a file input into JSON file containing an array of file references as value. +Transforms a file input into JSON file containing a string array formed of the single input file reference as value. """ import argparse import json @@ -17,27 +17,23 @@ LOGGER.addHandler(logging.StreamHandler(sys.stdout)) LOGGER.setLevel(logging.INFO) -OUTPUT_CWL_JSON = "cwl.output.json" +# process details +__version__ = "1.0" +__title__ = "File to string array" +__abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative def main(input_file, output_dir): # type: (argparse.FileType, str) -> None - LOGGER.info( - "Got arguments: input_file=%s output_dir=%s", input_file, output_dir - ) - output_data = {"output": [input_file]} - json.dump(output_data, open(os.path.join(output_dir, OUTPUT_CWL_JSON), "w")) + LOGGER.info("Got arguments: input_file=%s output_dir=%s", input_file, output_dir) + output_data = [input_file] + json.dump(output_data, open(os.path.join(output_dir, "output.json"), "w")) if __name__ == "__main__": LOGGER.info("Parsing inputs of '%s' process.", PACKAGE_NAME) PARSER = argparse.ArgumentParser(description=__doc__) - PARSER.add_argument("-i", help="CWL File") - PARSER.add_argument( - "-o", - metavar="outdir", - required=True, - help="Output directory of the retrieved NetCDF files extracted by name from the JSON file.", - ) + PARSER.add_argument("-i", required=True, help="Input file reference.") + PARSER.add_argument("-o", required=True, help="Output directory where to generate the JSON file with input file.") ARGS = PARSER.parse_args() sys.exit(main(ARGS.i, ARGS.o)) diff --git a/weaver/processes/builtin/jsonarray2netcdf.cwl b/weaver/processes/builtin/jsonarray2netcdf.cwl index a4ad3308f..f4ee80126 100644 --- a/weaver/processes/builtin/jsonarray2netcdf.cwl +++ b/weaver/processes/builtin/jsonarray2netcdf.cwl @@ -1,7 +1,6 @@ #!/usr/bin/env cwl-runner cwlVersion: v1.0 class: CommandLineTool -# target the installed python pointing to weaver conda env to allow imports baseCommand: python arguments: - "${WEAVER_ROOT_DIR}/weaver/processes/builtin/jsonarray2netcdf.py" diff --git a/weaver/processes/builtin/metalink2netcdf.cwl b/weaver/processes/builtin/metalink2netcdf.cwl index e962ff71d..b8a492922 100644 --- a/weaver/processes/builtin/metalink2netcdf.cwl +++ b/weaver/processes/builtin/metalink2netcdf.cwl @@ -1,7 +1,6 @@ #!/usr/bin/env cwl-runner cwlVersion: v1.0 class: CommandLineTool -# target the installed python pointing to weaver conda env to allow imports baseCommand: python arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/metalink2netcdf.py", "-o", $(runtime.outdir)] inputs: diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index d645d2435..6a800d8a1 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -720,7 +720,7 @@ def setup_loggers(self, log_stdout_stderr=True): log_file_handler.setFormatter(log_file_formatter) # prepare package logger - self.logger = logging.getLogger("{}.{}".format(LOGGER.name, self.package_id)) + self.logger = logging.getLogger("{}|{}".format(LOGGER.name, self.package_id)) self.logger.addHandler(log_file_handler) self.logger.setLevel(self.log_level)