Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix python file2stringarray #278

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,19 @@ Changes

Changes:
--------
- No change.
- | Modify problematic output location and execution methodology of ``file2string_array`` process so it does what
dbyrns marked this conversation as resolved.
Show resolved Hide resolved
it actually advertises in its ``abstract`` description and doesn't result in error after execution.
|
| This modification actually changes the internal operation accomplished by ``file2string_array`` process
since it was attempting to create directly a CWL output of type ``File[]``. This is not yet supported
in `Weaver` (see issue `#25 <https://github.com/crim-ca/weaver/issues/25>`_) because `OGC API - Processes`
does not allow output multiplicity under a same output ID.

Fixes:
------
- No change.
- Fix invalid ``python`` reference location in ``file2string_array`` process CWL definition
(fixes `#275 <https://github.com/crim-ca/weaver/issues/275>`_).
- Fix missing ``version`` field definition for ``file2string_array`` process and set it as ``1.0``.

`3.3.0 <https://github.com/crim-ca/weaver/tree/3.3.0>`_ (2021-07-16)
========================================================================
Expand Down
84 changes: 83 additions & 1 deletion tests/functional/test_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,16 @@
from typing import TYPE_CHECKING

import pytest
import mock

from tests.functional.utils import WpsPackageConfigBase
from tests.utils import get_settings_from_testapp, mocked_execute_process, mocked_sub_requests
from tests.utils import (
get_settings_from_testapp,
mocked_execute_process,
mocked_http_file,
mocked_reference_test_file,
mocked_sub_requests
)
from weaver.execute import EXECUTE_TRANSMISSION_MODE_REFERENCE
from weaver.formats import CONTENT_TYPE_APP_JSON, CONTENT_TYPE_APP_NETCDF
from weaver.processes.builtin import register_builtin_processes
Expand Down Expand Up @@ -117,3 +124,78 @@ def test_jsonarray2netcdf_execute(self):
assert isinstance(nc_path, str) and len(nc_path)
assert nc_path.startswith(wps_out)
assert os.path.split(nc_real_path)[-1] == os.path.split(nc_path)[-1]

def test_file2string_array_describe(self):
resp = self.app.get("/processes/file2string_array", headers=self.json_headers)
assert resp.status_code == 200
assert resp.content_type in CONTENT_TYPE_APP_JSON
assert resp.json["process"]["id"] == "file2string_array"
dbyrns marked this conversation as resolved.
Show resolved Hide resolved
assert resp.json["process"]["abstract"] not in ["", None]
assert resp.json["process"]["executeEndpoint"] == "https://localhost/processes/file2string_array/jobs"
assert isinstance(resp.json["process"]["inputs"], list)
assert len(resp.json["process"]["inputs"]) == 1
assert resp.json["process"]["inputs"][0]["id"] == "input"
assert isinstance(resp.json["process"]["inputs"][0]["formats"], list)
assert len(resp.json["process"]["inputs"][0]["formats"]) == 1 # must exist for file, mime-type not important
assert isinstance(resp.json["process"]["outputs"], list)
assert len(resp.json["process"]["outputs"]) == 1
assert resp.json["process"]["outputs"][0]["id"] == "output"
assert isinstance(resp.json["process"]["outputs"][0]["formats"], list)
assert len(resp.json["process"]["outputs"][0]["formats"]) == 1
assert resp.json["process"]["outputs"][0]["formats"][0]["mimeType"] == CONTENT_TYPE_APP_JSON # important here

# FIXME: must simulate otherwise tmp-file gets fetched and copied to pywps dir, changing output content (wrong path)
@mock.patch("weaver.processes.wps_package.WpsPackage.must_fetch", return_value=False)
def test_file2string_array_execute(self, __mock):
fake_href = "https://random-server.com/fake-data.txt"
with contextlib.ExitStack() as stack_exec:
data = {
"mode": "async",
"response": "document",
"inputs": [{"id": "input", "href": fake_href}],
"outputs": [{"id": "output", "transmissionMode": EXECUTE_TRANSMISSION_MODE_REFERENCE}],
}

for mock_exec in mocked_execute_process():
stack_exec.enter_context(mock_exec)
path = "/processes/file2string_array/jobs"
resp = mocked_sub_requests(self.app, "post_json", path,
data=data, headers=self.json_headers, only_local=True)

assert resp.status_code == 201, "Error: {}".format(resp.json)
assert resp.content_type in CONTENT_TYPE_APP_JSON
job_url = resp.json["location"]
results = self.monitor_job(job_url)

# first validate format of OGC-API results
assert "output" in results, "Expected result ID 'output' in response body"
assert isinstance(results["output"], dict), "Container of result ID 'output' should be a dict"
assert "href" in results["output"]
assert "format" in results["output"]
fmt = results["output"]["format"] # type: JSON
assert isinstance(fmt, dict), "Result format should be provided with content details"
assert "mediaType" in fmt
assert isinstance(fmt["mediaType"], str), "Result format Content-Type should be a single string definition"
assert fmt["mediaType"] == CONTENT_TYPE_APP_JSON, "Result 'output' format expected to be JSON file"
out_path = results["output"]["href"]
assert isinstance(out_path, str) and len(out_path)
settings = get_settings_from_testapp(self.app)
wps_out = "{}{}".format(settings.get("weaver.url"), settings.get("weaver.wps_output_path"))
real_path = out_path.replace(wps_out, settings.get("weaver.wps_output_dir"))
assert out_path.startswith(wps_out)
assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
assert os.path.isfile(real_path)
with open(real_path, "r") as f:
out_data = json.load(f)
assert out_data == [fake_href]

# if everything was valid for results, validate equivalent but differently formatted outputs response
output_url = job_url + "/outputs"
resp = self.app.get(output_url, headers=self.json_headers)
assert resp.status_code == 200, "Error job outputs:\n{}".format(resp.json)
outputs = resp.json
assert outputs["outputs"][0]["id"] == "output"
out_path = outputs["outputs"][0]["href"]
assert isinstance(out_path, str) and len(out_path)
assert out_path.startswith(wps_out)
assert os.path.split(real_path)[-1] == os.path.split(out_path)[-1]
8 changes: 4 additions & 4 deletions tests/functional/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,19 +1028,19 @@ def tmp(value):

assert desc["process"] is not None

test_bucket_ref = mocked_aws_s3_bucket_test_file(
test_bucket_ref, _ = mocked_aws_s3_bucket_test_file(
"wps-process-test-bucket",
"input_file_s3.txt",
"This is a generated file for s3 test"
)

test_http_ref = mocked_reference_test_file(
test_http_ref, _ = mocked_reference_test_file(
"input_file_http.txt",
"http",
"This is a generated file for http test"
)

test_file_ref = mocked_reference_test_file(
test_file_ref, _ = mocked_reference_test_file(
"input_file_ref.txt",
"file",
"This is a generated file for file test"
Expand Down Expand Up @@ -1596,7 +1596,7 @@ def test_execute_application_package_process_with_bucket(self):
input_file_s3 = "input-s3.txt"
input_file_http = "media-types.txt" # use some random HTTP location that actually exists (will be fetched)
test_http_ref = "https://www.iana.org/assignments/media-types/{}".format(input_file_http)
test_bucket_ref = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3)
test_bucket_ref, _ = mocked_aws_s3_bucket_test_file("wps-process-test-bucket", input_file_s3)
exec_body = {
"mode": EXECUTE_MODE_ASYNC,
"response": EXECUTE_RESPONSE_DOCUMENT,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ def test_fetch_file_remote_s3_bucket():
test_file_name = "test-file.txt"
test_file_data = "dummy file"
test_bucket_name = "test-fake-bucket"
test_bucket_ref = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data)
test_bucket_ref, _ = mocked_aws_s3_bucket_test_file(test_bucket_name, test_file_name, test_file_data)
result = fetch_file(test_bucket_ref, tmpdir)
assert result == os.path.join(tmpdir, test_file_name)
assert os.path.isfile(result)
Expand Down
35 changes: 26 additions & 9 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from weaver.warning import MissingParameterWarning, UnsupportedOperationWarning

if TYPE_CHECKING:
from typing import Any, Callable, Iterable, List, Optional, Type, Union
from typing import Any, Callable, Iterable, List, Optional, Tuple, Type, Union

import botocore.client # noqa
from owslib.wps import Process as ProcessOWSWPS
Expand Down Expand Up @@ -526,7 +526,7 @@ def wrapped(*args, **kwargs):


def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test file inside test S3 bucket"):
# type: (str,str, str) -> str
# type: (str,str, str) -> Tuple[str, str]
"""
Generates a test file reference from dummy data that will be uploaded to the specified S3 bucket name using the
provided file key.
Expand All @@ -548,17 +548,24 @@ def mocked_aws_s3_bucket_test_file(bucket_name, file_name, file_content="Test fi
with tempfile.NamedTemporaryFile(mode="w") as tmp_file:
tmp_file.write(file_content)
tmp_file.flush()
s3.upload_file(Bucket=bucket_name, Filename=tmp_file.name, Key=file_name)
return "s3://{}/{}".format(bucket_name, file_name)
tmp_path = tmp_file.name
s3.upload_file(Bucket=bucket_name, Filename=tmp_path, Key=file_name)
tmp_href = "s3://{}/{}".format(bucket_name, file_name)
return tmp_href, tmp_path


def mocked_http_file(test_func):
# type: (Callable[[...], Any]) -> Callable
"""
Creates a mock of the function :func:`fetch_file`, to fetch a generated file locally, for test purposes only.

For instance, calling this function with :func:`mocked_http_file` decorator
will effectively employ the mocked :func:`fetch_file` and return a generated local file.

Only file references that employ :py:data:`MOCK_HTTP_REF` as prefix will be mocked during file fetching step.
These kind of references can be obtained using the utility function :func:`mocked_reference_test_file`.
Other references will employ usual fetching methodology.

.. seealso::
- :func:`mocked_reference_test_file`
"""
Expand All @@ -574,16 +581,26 @@ def wrapped(*args, **kwargs):
return wrapped


def mocked_reference_test_file(file_name, href_type, file_content="This is a generated file for href test"):
# type: (str,str,str) -> str
def mocked_reference_test_file(file_name, href_type, file_content="test data", stack_context=None):
# type: (str, str, str, Optional[contextlib.ExitStack]) -> Tuple[str, str]
"""
Generates a test file reference from dummy data for http and file href types.

:param file_name: name of the temporary file to generate.
:param href_type: schema type of href (file, http, etc.)
:param file_content: contents of the temporary file.
:param stack_context: active context for auto-cleanup of temporary directly when closed.
:return: tuple of the generated reference and real temporary file path as (href, path)

.. seealso::
- :func:`mocked_http_file`
"""
tmpdir = tempfile.mkdtemp()
path = os.path.join(tmpdir, file_name)
if isinstance(stack_context, contextlib.ExitStack):
tmpdir = stack_context.enter_context(tempfile.TemporaryDirectory())
else:
tmpdir = tempfile.mkdtemp()
path = os.path.abspath(os.path.join(tmpdir, file_name))
with open(path, "w") as tmp_file:
tmp_file.write(file_content)
return "file://{}".format(path) if href_type == "file" else os.path.join(MOCK_HTTP_REF, path)
href = "file://{}".format(path) if href_type == "file" else (MOCK_HTTP_REF + path)
return href, path
5 changes: 3 additions & 2 deletions weaver/processes/builtin/file2string_array.cwl
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: ${WEAVER_ROOT_DIR}/bin/python
baseCommand: python
arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/file2string_array.py", "-o", $(runtime.outdir)]
inputs:
input:
Expand All @@ -15,5 +14,7 @@ outputs:
output:
type: File
format: iana:application/json
outputBinding:
glob: "output.json"
$namespaces:
iana: "https://www.iana.org/assignments/media-types/"
24 changes: 10 additions & 14 deletions weaver/processes/builtin/file2string_array.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python
"""
Transforms a file input into JSON file containing an array of file references as value.
Transforms a file input into JSON file containing a string array formed of the single input file reference as value.
"""
import argparse
import json
Expand All @@ -17,27 +17,23 @@
LOGGER.addHandler(logging.StreamHandler(sys.stdout))
LOGGER.setLevel(logging.INFO)

OUTPUT_CWL_JSON = "cwl.output.json"
# process details
__version__ = "1.0"
__title__ = "File to string array"
__abstract__ = __doc__ # NOTE: '__doc__' is fetched directly, this is mostly to be informative


def main(input_file, output_dir):
# type: (argparse.FileType, str) -> None
LOGGER.info(
"Got arguments: input_file=%s output_dir=%s", input_file, output_dir
)
output_data = {"output": [input_file]}
json.dump(output_data, open(os.path.join(output_dir, OUTPUT_CWL_JSON), "w"))
LOGGER.info("Got arguments: input_file=%s output_dir=%s", input_file, output_dir)
output_data = [input_file]
json.dump(output_data, open(os.path.join(output_dir, "output.json"), "w"))


if __name__ == "__main__":
LOGGER.info("Parsing inputs of '%s' process.", PACKAGE_NAME)
PARSER = argparse.ArgumentParser(description=__doc__)
PARSER.add_argument("-i", help="CWL File")
PARSER.add_argument(
"-o",
metavar="outdir",
required=True,
help="Output directory of the retrieved NetCDF files extracted by name from the JSON file.",
)
PARSER.add_argument("-i", required=True, help="Input file reference.")
PARSER.add_argument("-o", required=True, help="Output directory where to generate the JSON file with input file.")
ARGS = PARSER.parse_args()
sys.exit(main(ARGS.i, ARGS.o))
1 change: 0 additions & 1 deletion weaver/processes/builtin/jsonarray2netcdf.cwl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: python
arguments:
- "${WEAVER_ROOT_DIR}/weaver/processes/builtin/jsonarray2netcdf.py"
Expand Down
1 change: 0 additions & 1 deletion weaver/processes/builtin/metalink2netcdf.cwl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
# target the installed python pointing to weaver conda env to allow imports
baseCommand: python
arguments: ["${WEAVER_ROOT_DIR}/weaver/processes/builtin/metalink2netcdf.py", "-o", $(runtime.outdir)]
inputs:
Expand Down
2 changes: 1 addition & 1 deletion weaver/processes/wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,7 +720,7 @@ def setup_loggers(self, log_stdout_stderr=True):
log_file_handler.setFormatter(log_file_formatter)

# prepare package logger
self.logger = logging.getLogger("{}.{}".format(LOGGER.name, self.package_id))
self.logger = logging.getLogger("{}|{}".format(LOGGER.name, self.package_id))
self.logger.addHandler(log_file_handler)
self.logger.setLevel(self.log_level)

Expand Down