crim-ca · fmigneault · Jul 7, 2021 · Jul 7, 2021 · Jul 7, 2021 · Jul 16, 2021
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -10,11 +10,19 @@ Changes
 
 Changes:
 --------
-- No change.
+- | Modify problematic output location and execution methodology of ``file2string_array`` process so it does what
+    it actually advertises in its ``abstract`` description and doesn't result in error after execution.
+  |
+  | This modification actually changes the internal operation accomplished by ``file2string_array`` process
+    since it was attempting to create directly a CWL output of type ``File[]``. This is not yet supported
+    in `Weaver` (see issue `#25 <https://github.com/crim-ca/weaver/issues/25>`_) because `OGC API - Processes`
+    does not allow output multiplicity under a same output ID.
 
 Fixes:
 ------
-- No change.
+- Fix invalid ``python`` reference location in ``file2string_array`` process CWL definition
+  (fixes `#275 <https://github.com/crim-ca/weaver/issues/275>`_).
+- Fix missing ``version`` field definition for ``file2string_array`` process and set it as ``1.0``.
 
 `3.3.0 <https://github.com/crim-ca/weaver/tree/3.3.0>`_ (2021-07-16)
 ========================================================================

diff --git a/tests/functional/test_builtin.py b/tests/functional/test_builtin.py
@@ -5,9 +5,16 @@
 from typing import TYPE_CHECKING
 
 import pytest
+import mock
 
 from tests.functional.utils import WpsPackageConfigBase
-from tests.utils import get_settings_from_testapp, mocked_execute_process, mocked_sub_requests
+from tests.utils import (
+    get_settings_from_testapp,
+    mocked_execute_process,
+    mocked_http_file,
+    mocked_reference_test_file,
+    mocked_sub_requests
+)
 from weaver.execute import EXECUTE_TRANSMISSION_MODE_REFERENCE
 from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF
 from weaver.processes.builtin import register_builtin_processes
@@ -117,3 +124,78 @@ def test_jsonarray2netcdf_execute(self):
         assert isinstance(nc_path, str) and len(nc_path)
         assert nc_path.startswith(wps_out)
         assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1]
+
+    def test_file2string_array_describe(self):
+        resp = self.app.get("/processes/file2string_array", headers=self.json_headers)
+        assert resp.status_code == 200
+        assert resp.content_type in CONTENT_TYPE_APP_JSON
+        assert resp.json["process"]["id"] == "file2string_array"
+        assert resp.json["process"]["abstract"] not in ["", None]
+        assert resp.json["process"]["executeEndpoint"] == "https://localhost/processes/file2string_array/jobs"
+        assert isinstance(resp.json["process"]["inputs"], list)
+        assert len(resp.json["process"]["inputs"]) == 1
+        assert resp.json["process"]["inputs"][0]["id"] == "input"
+        assert isinstance(resp.json["process"]["inputs"][0]["formats"], list)
+        assert len(resp.json["process"]["inputs"][0]["formats"]) == 1  # must exist for file, mime-type not important
+        assert isinstance(resp.json["process"]["outputs"], list)
+        assert len(resp.json["process"]["outputs"]) == 1
+        assert resp.json["process"]["outputs"][0]["id"] == "output"
+        assert isinstance(resp.json["process"]["outputs"][0]["formats"], list)
+        assert len(resp.json["process"]["outputs"][0]["formats"]) == 1
+        assert resp.json["process"]["outputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON  # important here
+
+    # FIXME: must simulate otherwise tmp-file gets fetched and copied to pywps dir, changing output content (wrong path)
+    @mock.patch("weaver.processes.wps_package.WpsPackage.must_fetch", return_value=False)
+    def test_file2string_array_execute(self, __mock):
+        fake_href = "https://random-server.com/fake-data.txt"
+        with contextlib.ExitStack() as stack_exec:
+            data = {
+                "mode": "async",
+                "response": "document",
+                "inputs": [{"id": "input", "href": fake_href}],
+                "outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}],
+            }
+
+            for mock_exec in mocked_execute_process():
+                stack_exec.enter_context(mock_exec)
+            path = "/processes/file2string_array/jobs"
+            resp = mocked_sub_requests(self.app, "post_json", path,
+                                       data=data, headers=self.json_headers, only_local=True)
+
+        assert resp.status_code == 201, "Error: {}".format(resp.json)
+        assert resp.content_type in CONTENT_TYPE_APP_JSON
+        job_url = resp.json["location"]
+        results = self.monitor_job(job_url)
+
+        # first validate format of OGC-API results
+        assert "output" in results, "Expected result ID 'output' in response body"
+        assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict"
+        assert "href" in results["output"]
+        assert "format" in results["output"]
+        fmt = results["output"]["format"]  # type: JSON
+        assert isinstance(fmt, dict), "Result format should be provided with content details"
+        assert "mediaType" in fmt
+        assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition"
+        assert fmt["mediaType"] == CONTENT_TYPE_APP_JSON, "Result 'output' format expected to be JSON file"
+        out_path = results["output"]["href"]
+        assert isinstance(out_path, str) and len(out_path)
+        settings = get_settings_from_testapp(self.app)
+        wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path"))
+        real_path = out_path.replace(wps_out, settings.get("weaver.wps_output_dir"))
+        assert out_path.startswith(wps_out)
+        assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
+        assert os.path.isfile(real_path)
+        with open(real_path, "r") as f:
+            out_data = json.load(f)
+        assert out_data == [fake_href]
+
+        # if everything was valid for results, validate equivalent but differently formatted outputs response
+        output_url = job_url + "/outputs"
+        resp = self.app.get(output_url, headers=self.json_headers)
+        assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json)
+        outputs = resp.json
+        assert outputs["outputs"][0]["id"] == "output"
+        out_path = outputs["outputs"][0]["href"]
+        assert isinstance(out_path, str) and len(out_path)
+        assert out_path.startswith(wps_out)
+        assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py
@@ -1028,19 +1028,19 @@ def tmp(value):
 
         assert desc["process"] is not None
 
-        test_bucket_ref = mocked_aws_s3_bucket_test_file(
+        test_bucket_ref, _ = mocked_aws_s3_bucket_test_file(
             "wps-process-test-bucket",
             "input_file_s3.txt",
             "This is a generated file for s3 test"
         )
 
-        test_http_ref = mocked_reference_test_file(
+        test_http_ref, _ = mocked_reference_test_file(
             "input_file_http.txt",
             "http",
             "This is a generated file for http test"
         )
 
-        test_file_ref = mocked_reference_test_file(
+        test_file_ref, _ = mocked_reference_test_file(
             "input_file_ref.txt",
             "file",
             "This is a generated file for file test"
@@ -1596,7 +1596,7 @@ def test_execute_application_package_process_with_bucket(self):
         input_file_s3 = "input-s3.txt"
         input_file_http = "media-types.txt"  # use some random HTTP location that actually exists (will be fetched)
         test_http_ref = "https://www.iana.org/assignments/media-types/{}".format(input_file_http)
-        test_bucket_ref = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3)
+        test_bucket_ref, _ = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3)
         exec_body = {
             "mode": EXECUTE_MODE_ASYNC,
             "response": EXECUTE_RESPONSE_DOCUMENT,

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -552,7 +552,7 @@ def test_fetch_file_remote_s3_bucket():
         test_file_name = "test-file.txt"
         test_file_data = "dummy file"
         test_bucket_name = "test-fake-bucket"
-        test_bucket_ref = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data)
+        test_bucket_ref, _ = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data)
         result = fetch_file(test_bucket_ref, tmpdir)
         assert result == os.path.join(tmpdir, test_file_name)
         assert os.path.isfile(result)

diff --git a/tests/utils.py b/tests/utils.py
@@ -34,7 +34,7 @@
 from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning
 
 if TYPE_CHECKING:
-    from typing import Any, Callable, Iterable, List, Optional, Type, Union
+    from typing import Any, Callable, Iterable, List, Optional, Tuple, Type, Union
 
     import botocore.client  # noqa
     from owslib.wps import Process as ProcessOWSWPS
@@ -526,7 +526,7 @@ def wrapped(*args, **kwargs):
 
 
 def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test file inside test S3 bucket"):
-    # type: (str,str, str) -> str
+    # type: (str,str, str) -> Tuple[str, str]
     """
     Generates a test file reference from dummy data that will be uploaded to the specified S3 bucket name using the
     provided file key.
@@ -548,17 +548,24 @@ def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test fi
     with tempfile.NamedTemporaryFile(mode="w") as tmp_file:
         tmp_file.write(file_content)
         tmp_file.flush()
-        s3.upload_file(Bucket=bucket_name, Filename=tmp_file.name, Key=file_name)
-    return "s3://{}/{}".format(bucket_name, file_name)
+        tmp_path = tmp_file.name
+        s3.upload_file(Bucket=bucket_name, Filename=tmp_path, Key=file_name)
+    tmp_href = "s3://{}/{}".format(bucket_name, file_name)
+    return tmp_href, tmp_path
 
 
 def mocked_http_file(test_func):
     # type: (Callable[[...], Any]) -> Callable
     """
     Creates a mock of the function :func:`fetch_file`, to fetch a generated file locally, for test purposes only.
+
     For instance, calling this function with :func:`mocked_http_file` decorator
     will effectively employ the mocked :func:`fetch_file` and return a generated local file.
 
+    Only file references that employ :py:data:`MOCK_HTTP_REF` as prefix will be mocked during file fetching step.
+    These kind of references can be obtained using the utility function :func:`mocked_reference_test_file`.
+    Other references will employ usual fetching methodology.
+
     .. seealso::
         - :func:`mocked_reference_test_file`
     """
@@ -574,16 +581,26 @@ def wrapped(*args, **kwargs):
     return wrapped
 
 
-def mocked_reference_test_file(file_name, href_type, file_content="This is a generated file for href test"):
-    # type: (str,str,str) -> str
+def mocked_reference_test_file(file_name, href_type, file_content="test data", stack_context=None):
+    # type: (str, str, str, Optional[contextlib.ExitStack]) -> Tuple[str, str]
     """
     Generates a test file reference from dummy data for http and file href types.
 
+    :param file_name: name of the temporary file to generate.
+    :param href_type: schema type of href (file, http, etc.)
+    :param file_content: contents of the temporary file.
+    :param stack_context: active context for auto-cleanup of temporary directly when closed.
+    :return: tuple of the generated reference and real temporary file path as (href, path)
+
     .. seealso::
         - :func:`mocked_http_file`
     """
-    tmpdir = tempfile.mkdtemp()
-    path = os.path.join(tmpdir, file_name)
+    if isinstance(stack_context, contextlib.ExitStack):
+        tmpdir = stack_context.enter_context(tempfile.TemporaryDirectory())
+    else:
+        tmpdir = tempfile.mkdtemp()
+    path = os.path.abspath(os.path.join(tmpdir, file_name))
     with open(path, "w") as tmp_file:
         tmp_file.write(file_content)
-    return "file://{}".format(path) if href_type == "file" else os.path.join(MOCK_HTTP_REF, path)
+    href = "file://{}".format(path) if href_type == "file" else (MOCK_HTTP_REF + path)
+    return href, path
diff --git a/weaver/processes/builtin/file2string_array.cwl b/weaver/processes/builtin/file2string_array.cwl
@@ -1,8 +1,7 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
-baseCommand: ${WEAVER_ROOT_DIR}/bin/python
+baseCommand: python
 arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)]
 inputs:
   input:
@@ -15,5 +14,7 @@ outputs:
   output:
     type: File
     format: iana:application/json
+    outputBinding:
+      glob: "output.json"
 $namespaces:
   iana: "https://www.iana.org/assignments/media-types/"
diff --git a/weaver/processes/builtin/file2string_array.py b/weaver/processes/builtin/file2string_array.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
-Transforms a file input into JSON file containing an array of file references as value.
+Transforms a file input into JSON file containing a string array formed of the single input file reference as value.
 """
 import argparse
 import json
@@ -17,27 +17,23 @@
 LOGGER.addHandler(logging.StreamHandler(sys.stdout))
 LOGGER.setLevel(logging.INFO)
 
-OUTPUT_CWL_JSON = "cwl.output.json"
+# process details
+__version__ = "1.0"
+__title__ = "File to string array"
+__abstract__ = __doc__  # NOTE: '__doc__' is fetched directly, this is mostly to be informative
 
 
 def main(input_file, output_dir):
     # type: (argparse.FileType, str) -> None
-    LOGGER.info(
-        "Got arguments: input_file=%s output_dir=%s", input_file, output_dir
-    )
-    output_data = {"output": [input_file]}
-    json.dump(output_data, open(os.path.join(output_dir, OUTPUT_CWL_JSON), "w"))
+    LOGGER.info("Got arguments: input_file=%s output_dir=%s", input_file, output_dir)
+    output_data = [input_file]
+    json.dump(output_data, open(os.path.join(output_dir, "output.json"), "w"))
 
 
 if __name__ == "__main__":
     LOGGER.info("Parsing inputs of '%s' process.", PACKAGE_NAME)
     PARSER = argparse.ArgumentParser(description=__doc__)
-    PARSER.add_argument("-i", help="CWL File")
-    PARSER.add_argument(
-        "-o",
-        metavar="outdir",
-        required=True,
-        help="Output directory of the retrieved NetCDF files extracted by name from the JSON file.",
-    )
+    PARSER.add_argument("-i", required=True, help="Input file reference.")
+    PARSER.add_argument("-o", required=True, help="Output directory where to generate the JSON file with input file.")
     ARGS = PARSER.parse_args()
     sys.exit(main(ARGS.i, ARGS.o))
diff --git a/weaver/processes/builtin/jsonarray2netcdf.cwl b/weaver/processes/builtin/jsonarray2netcdf.cwl
@@ -1,7 +1,6 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
 baseCommand: python
 arguments:
   - "${WEAVER_ROOT_DIR}/weaver/processes/builtin/jsonarray2netcdf.py"

diff --git a/weaver/processes/builtin/metalink2netcdf.cwl b/weaver/processes/builtin/metalink2netcdf.cwl
@@ -1,7 +1,6 @@
 #!/usr/bin/env cwl-runner
 cwlVersion: v1.0
 class: CommandLineTool
-# target the installed python pointing to weaver conda env to allow imports
 baseCommand: python
 arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/metalink2netcdf.py", "-o", $(runtime.outdir)]
 inputs:

diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py
@@ -720,7 +720,7 @@ def setup_loggers(self, log_stdout_stderr=True):
         log_file_handler.setFormatter(log_file_formatter)
 
         # prepare package logger
-        self.logger = logging.getLogger("{}.{}".format(LOGGER.name, self.package_id))
+        self.logger = logging.getLogger("{}|{}".format(LOGGER.name, self.package_id))
         self.logger.addHandler(log_file_handler)
         self.logger.setLevel(self.log_level)