From 7e70e54fa291df08b3d1989338953155c92f4cdb Mon Sep 17 00:00:00 2001 From: dschuck Date: Tue, 17 May 2022 00:05:47 +0000 Subject: [PATCH] Add columns and query expression for subsetting Allow user to control specific columns to include in output, as well as query expression for selecting/filtering rows. This might also allow the final output file to be small enough to load completely into memory. Also, refactor subsetting logic to improve performance. --- .markdownlint.yaml | 3 + gedi-subset/CHANGELOG.md | 15 +- gedi-subset/README.md | 27 ++- gedi-subset/algorithm_config.yaml | 6 +- gedi-subset/gedi_utils.py | 40 ++++- gedi-subset/hdf5.ipynb | 272 ++++++++++++++++++++++++++++++ gedi-subset/osx.py | 4 +- gedi-subset/subset.py | 49 ++++-- gedi-subset/subset.sh | 16 +- 9 files changed, 403 insertions(+), 29 deletions(-) create mode 100644 .markdownlint.yaml create mode 100644 gedi-subset/hdf5.ipynb diff --git a/.markdownlint.yaml b/.markdownlint.yaml new file mode 100644 index 0000000..c673096 --- /dev/null +++ b/.markdownlint.yaml @@ -0,0 +1,3 @@ +default: true +MD024: # no-duplicate-heading/no-duplicate-header + allow_different_nesting: true diff --git a/gedi-subset/CHANGELOG.md b/gedi-subset/CHANGELOG.md index 9bc4c45..cd1ad2c 100644 --- a/gedi-subset/CHANGELOG.md +++ b/gedi-subset/CHANGELOG.md @@ -7,7 +7,18 @@ variation of [Semantic Versioning], with the following difference: each version is prefixed with `gedi-subset-` (e.g., `gedi-subset-0.1.0`) to allow for distinct lines of versioning of independent work in sibling directories. -## [0.1.0] - 2022-05-26 +## [gedi-subset-0.2.0] - 2022-06-01 + +## Added + +- Added inputs `columns` and `query` to refine filtering/subsetting. See + `gedi-subset/README.md` for details. + +## Changed + +- Improved performance of subsetting/filtering logic, resulting in ~5x speedup. + +## [gedi-subset-0.1.0] - 2022-06-01 ### Added @@ -17,4 +28,4 @@ distinct lines of versioning of independent work in sibling directories. [Keep a Changelog]: https://keepachangelog.com/en/1.0.0/ [Semantic Versioning]: - https://semver.org/spec/v2.0.0.html \ No newline at end of file + https://semver.org/spec/v2.0.0.html diff --git a/gedi-subset/README.md b/gedi-subset/README.md index 4c10c31..e0f4590 100644 --- a/gedi-subset/README.md +++ b/gedi-subset/README.md @@ -25,9 +25,25 @@ At a high level, the GEDI subsetting algorithm does the following: To run a GEDI subsetting DPS job, you must supply the following inputs: -- `aoi`: URL to a GeoJSON file representing your area of interest +- `aoi` (**required**): URL to a GeoJSON file representing your area of interest +- `columns`: Comma-separated list of column names to include in output file. + (**Default:** + `agbd, agbd_se, l2_quality_flag, l4_quality_flag, sensitivity, sensitivity_a2`) +- `query`: Query expression for subsetting the rows in the output file. + **IMPORTANT:** The `columns` input must contain at least all of the columns + that appear in this query expression, otherwise an error will occur. + (**Default:** `l2_quality_flag == 1 and l4_quality_flag == 1 and sensitivity > + 0.95 and sensitivity_a2 > 0.95"`) - `limit`: Maximum number of GEDI granule data files to download (among those - that intersect the specified AOI) + that intersect the specified AOI). (**Default:** 10,000) + +**IMPORTANT:** When supplying input values via the ADE UI, for convenience, to +accept _all_ default values, you may leave _all_ optional inputs blank. +However, if you supply a value for _any_ optional input, you must enter a dash +(`-`) as the input value for _all other_ optional inputs. This ensures that +the input values remain correctly ordered for the underlying script to which the +inputs are supplied. Otherwise, your job may fail due to invalid script +arguments, or might produce unpredictable results. If your AOI is a publicly available geoBoundary, see [Getting the GeoJSON URL for a geoBoundary](#getting-the-geojson-url-for-a-geoboundary) @@ -233,7 +249,7 @@ able to register the new version of the algorithm, as follows, within the ADE: 1. Pull the latest code from GitHub (to obtain merged PR, if necessary): ```bash - git pull origin + git pull origin main git checkout main ``` @@ -242,6 +258,7 @@ able to register the new version of the algorithm, as follows, within the ADE: ```bash git push --all ade + git push --tags ade ``` 1. In the ADE's File Browser, navigate to @@ -263,7 +280,9 @@ able to register the new version of the algorithm, as follows, within the ADE: Country Boundaries from: -Runfola, D. et al. (2020) geoBoundaries: A global database of political administrative boundaries. PLoS ONE 15(4): e0231866. +Runfola, D. et al. (2020) geoBoundaries: A global database of political +administrative boundaries. PLoS ONE 15(4): e0231866. + [geoBoundaries]: https://www.geoboundaries.org diff --git a/gedi-subset/algorithm_config.yaml b/gedi-subset/algorithm_config.yaml index c75f4ce..4342c84 100644 --- a/gedi-subset/algorithm_config.yaml +++ b/gedi-subset/algorithm_config.yaml @@ -1,6 +1,6 @@ description: Subset GEDI L4A granules within an area of interest (AOI) algo_name: gedi-subset -version: gedi-subset-0.1.0 +version: gedi-subset-0.2.0 environment: ubuntu repository_url: https://repo.ops.maap-project.org/data-team/maap-documentation-examples.git docker_url: mas.maap-project.org:5000/root/ade-base-images/r:latest @@ -11,5 +11,9 @@ disk_space: 20GB inputs: - name: aoi download: True + - name: columns + download: False + - name: query + download: False - name: limit download: False diff --git a/gedi-subset/gedi_utils.py b/gedi-subset/gedi_utils.py index f85b237..2382bfd 100644 --- a/gedi-subset/gedi_utils.py +++ b/gedi-subset/gedi_utils.py @@ -3,7 +3,7 @@ import os import os.path import warnings -from typing import Any, Callable, Mapping, Sequence, TypeVar, Union +from typing import Any, Callable, List, Mapping, Sequence, TypeVar, Union import h5py import numpy as np @@ -67,7 +67,7 @@ def df_assign(col_name: str, val: Any, df: _DF) -> _DF: @curry def append_message(extra_message: str, e: Exception) -> Exception: - message, *other_args = e.args if e.args else ("",) + message, *other_args = e.args if e.args else ("",) # pytype: disable=bad-unpacking new_message = f"{message}: {extra_message}" if message else extra_message e.args = (new_message, *other_args) @@ -177,7 +177,7 @@ def spatial_filter(beam, aoi): @curry def subset_h5( - path: Union[str, os.PathLike], aoi: gpd.GeoDataFrame, filter_cols: Sequence[str] + path: Union[str, os.PathLike], aoi: gpd.GeoDataFrame, filter_cols: Sequence[str], expr: str ) -> gpd.GeoDataFrame: """ Extract the beam data only for the aoi and only columns of interest @@ -226,10 +226,10 @@ def subset_h5( col_val.append(value[:][indices].tolist()) # create a pandas dataframe - beam_df = pd.DataFrame(map(list, zip(*col_val)), columns=col_names) + beam_df = pd.DataFrame(map(list, zip(*col_val)), columns=col_names).query(expr) # Inserting BEAM names beam_df.insert( - 0, "BEAM", np.repeat(str(v), len(beam_df.index)).tolist() + 0, "BEAM", np.repeat(v[5:], len(beam_df.index)).tolist() ) # Appending to the subset_df dataframe subset_df = pd.concat([subset_df, beam_df]) @@ -250,6 +250,36 @@ def subset_h5( return subset_gdf +def subset_hdf5( + path: str, + aoi: gpd.GeoDataFrame, + columns: Sequence[str], + expr: str, +) -> gpd.GeoDataFrame: + def subset_beam(beam: h5py.Group) -> gpd.GeoDataFrame: + def append_series(path: str, value: Union[h5py.Group, h5py.Dataset]) -> None: + if (name := path.split("/")[-1]) in columns: + series.append(pd.Series(value, name=name)) + + series: List[pd.Series] = [] + beam.visititems(append_series) + df = pd.concat(series, axis=1).query(expr) + df.insert(0, "BEAM", beam.name[5:]) + + x, y = df.lon_lowestmode, df.lat_lowestmode + df.drop(["lon_lowestmode", "lat_lowestmode"], axis=1, inplace=True) + gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(x, y), crs="EPSG:4326") + + return gdf[gdf.geometry.within(aoi.geometry[0])] + + with h5py.File(path) as hdf5: + beams = (value for key, value in hdf5.items() if key.startswith("BEAM")) + beam_dfs = (subset_beam(beam) for beam in beams) + beams_df = pd.concat(beam_dfs, ignore_index=True, copy=False) + + return beams_df + + def write_subset(infile, gdf): """ Write GeoDataFrame to Flatgeobuf diff --git a/gedi-subset/hdf5.ipynb b/gedi-subset/hdf5.ipynb new file mode 100644 index 0000000..fe688b5 --- /dev/null +++ b/gedi-subset/hdf5.ipynb @@ -0,0 +1,272 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 79, + "id": "collect-charlotte", + "metadata": {}, + "outputs": [], + "source": [ + "from functools import reduce\n", + "\n", + "import geopandas as gpd\n", + "import h5py\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "broken-scale", + "metadata": {}, + "outputs": [], + "source": [ + "path = '/projects/maap-documentation-examples/output/GEDI04_A_2019111040155_O02008_02_T04616_02_002_02_V002.h5'\n", + "columns = [\n", + " \"agbd\",\n", + " \"agbd_se\",\n", + " \"l2_quality_flag\",\n", + " \"l4_quality_flag\",\n", + " \"lat_lowestmode\",\n", + " \"lon_lowestmode\",\n", + " \"sensitivity\",\n", + " \"sensitivity_a2\",\n", + "]\n", + "query = \"l2_quality_flag == 1 and l4_quality_flag == 1 and sensitivity > 0.95 and sensitivity_a2 > 0.95\"" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "compatible-thompson", + "metadata": {}, + "outputs": [], + "source": [ + "def subset_h5(path, filter_cols, query):\n", + " \"\"\"\n", + " Extract the beam data only for the aoi and only columns of interest\n", + " \"\"\"\n", + " \n", + " subset_df = pd.DataFrame()\n", + " \n", + " with h5py.File(path, \"r\") as hf_in:\n", + " # loop through BEAMXXXX groups\n", + " for k, beam in ((k, v) for k, v in hf_in.items() if k.startswith(\"BEAM\")):\n", + " col_names = []\n", + " col_val = []\n", + "# indices = spatial_filter(beam, aoi)\n", + " items = (\n", + " (k, v)\n", + " for k, v in beam.items()\n", + " if k in filter_cols or isinstance(v, h5py.Group)\n", + " )\n", + "\n", + " for key, value in items:\n", + " # looping through subgroups\n", + " if isinstance(value, h5py.Group):\n", + " items2 = ((k2, _) for k2, _ in value.items() if k2 in filter_cols)\n", + " for key2, value2 in items2:\n", + " # xvar variables have 2D\n", + " if key2.startswith(\"xvar\"):\n", + " for r in range(4):\n", + " col_names.append(key2 + \"_\" + str(r + 1))\n", + " col_val.append(value2[(), r].tolist())\n", + " else:\n", + " col_names.append(key2)\n", + " col_val.append(value2[()])\n", + " # looping through base group\n", + " elif key.startswith(\"xvar\"):\n", + " # xvar variables have 2D\n", + " for r in range(4):\n", + " col_names.append(key + \"_\" + str(r + 1))\n", + " col_val.append(value[(), r])\n", + " else:\n", + " col_names.append(key)\n", + " col_val.append(value[()])\n", + "\n", + " # create a pandas dataframe\n", + " beam_df = pd.DataFrame(map(list, zip(*col_val)), columns=col_names)\n", + " beam_df.query(query, inplace=True)\n", + " # Inserting BEAM names\n", + " beam_df.insert(0, \"BEAM\", np.repeat(str(k), len(beam_df.index)))\n", + " # Appending to the subset_df dataframe\n", + " subset_df = pd.concat([subset_df, beam_df])\n", + " \n", + " return subset_df" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "covered-figure", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " BEAM agbd agbd_se sensitivity_a2 l2_quality_flag \\\n", + "0 BEAM0000 141.049683 17.123022 0.978424 1 \n", + "1 BEAM0000 95.756226 17.124018 0.979147 1 \n", + "2 BEAM0000 90.346252 17.123966 0.966114 1 \n", + "3 BEAM0000 113.583145 17.124689 0.977122 1 \n", + "4 BEAM0000 93.160324 17.123558 0.983254 1 \n", + "\n", + " l4_quality_flag lat_lowestmode lon_lowestmode sensitivity \n", + "0 1 0.097697 9.372999 0.988014 \n", + "1 1 0.098120 9.373297 0.986098 \n", + "2 1 0.098542 9.373594 0.966114 \n", + "3 1 0.098964 9.373892 0.983985 \n", + "4 1 0.099386 9.374189 0.990431 \n", + " agbd agbd_se sensitivity_a2 l2_quality_flag \\\n", + "count 290661.000000 290661.000000 290661.000000 290661.0 \n", + "mean 8.946183 3.448750 0.986348 1.0 \n", + "std 48.692471 2.441094 0.005411 0.0 \n", + "min 0.516440 2.981795 0.950001 1.0 \n", + "25% 0.870713 3.005720 0.982702 1.0 \n", + "50% 1.061925 3.007290 0.986877 1.0 \n", + "75% 1.391625 3.008365 0.990650 1.0 \n", + "max 2762.396240 17.585686 0.998231 1.0 \n", + "\n", + " l4_quality_flag lat_lowestmode lon_lowestmode sensitivity \n", + "count 290661.0 290661.000000 290661.000000 290661.000000 \n", + "mean 1.0 27.329840 35.047115 0.967601 \n", + "std 0.0 11.926019 18.554284 0.010686 \n", + "min 1.0 0.062150 9.372999 0.950000 \n", + "25% 1.0 19.848514 24.166614 0.957756 \n", + "50% 1.0 24.433955 28.213979 0.967372 \n", + "75% 1.0 31.656327 35.637879 0.977670 \n", + "max 1.0 51.818693 92.572213 0.997179 \n", + "CPU times: user 4.52 s, sys: 249 ms, total: 4.77 s\n", + "Wall time: 5.75 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "result = subset_h5(path, columns, query)\n", + "\n", + "print(result.head())\n", + "print(result.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "enhanced-bargain", + "metadata": {}, + "outputs": [], + "source": [ + "def subset_beam(beam, columns, query):\n", + " def append_series(path, value):\n", + " if (name := path.split(\"/\")[-1]) in columns:\n", + " series.append(pd.Series(value, name=name))\n", + "\n", + " series = []\n", + " beam.visititems(append_series)\n", + "\n", + " df = pd.concat(series, axis=1)\n", + " df.query(query, inplace=True)\n", + " df.insert(0, \"BEAM\", np.repeat(beam.name[5:], len(df.index)))\n", + " \n", + " return df\n", + "\n", + "\n", + "def subset_hdf5(path, columns, query):\n", + " with h5py.File(path) as hdf5:\n", + " beams = (value for key, value in hdf5.items() if key.startswith(\"BEAM\"))\n", + " beam_dfs = (subset_beam(beam, columns, query) for beam in beams)\n", + " beams_df = reduce(lambda df0, df1: pd.concat([df0, df1], copy=False), beam_dfs)\n", + " \n", + " return beams_df" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "another-pixel", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " BEAM agbd agbd_se sensitivity_a2 l2_quality_flag \\\n", + "0 0000 141.049683 17.123022 0.978424 1 \n", + "1 0000 95.756226 17.124018 0.979147 1 \n", + "2 0000 90.346252 17.123966 0.966114 1 \n", + "3 0000 113.583145 17.124689 0.977122 1 \n", + "4 0000 93.160324 17.123558 0.983254 1 \n", + "\n", + " l4_quality_flag lat_lowestmode lon_lowestmode sensitivity \n", + "0 1 0.097697 9.372999 0.988014 \n", + "1 1 0.098120 9.373297 0.986098 \n", + "2 1 0.098542 9.373594 0.966114 \n", + "3 1 0.098964 9.373892 0.983985 \n", + "4 1 0.099386 9.374189 0.990431 \n", + " agbd agbd_se sensitivity_a2 l2_quality_flag \\\n", + "count 290661.000000 290661.000000 290661.000000 290661.0 \n", + "mean 8.946183 3.448750 0.986348 1.0 \n", + "std 48.692471 2.441094 0.005411 0.0 \n", + "min 0.516440 2.981795 0.950001 1.0 \n", + "25% 0.870713 3.005720 0.982702 1.0 \n", + "50% 1.061925 3.007290 0.986877 1.0 \n", + "75% 1.391625 3.008365 0.990650 1.0 \n", + "max 2762.396240 17.585686 0.998231 1.0 \n", + "\n", + " l4_quality_flag lat_lowestmode lon_lowestmode sensitivity \n", + "count 290661.0 290661.000000 290661.000000 290661.000000 \n", + "mean 1.0 27.329840 35.047115 0.967601 \n", + "std 0.0 11.926019 18.554284 0.010686 \n", + "min 1.0 0.062150 9.372999 0.950000 \n", + "25% 1.0 19.848514 24.166614 0.957756 \n", + "50% 1.0 24.433955 28.213979 0.967372 \n", + "75% 1.0 31.656327 35.637879 0.977670 \n", + "max 1.0 51.818693 92.572213 0.997179 \n", + "CPU times: user 661 ms, sys: 81 ms, total: 742 ms\n", + "Wall time: 2.01 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "result = subset_hdf5(path, columns, query)\n", + " \n", + "print(result.head())\n", + "print(result.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "plastic-russell", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/gedi-subset/osx.py b/gedi-subset/osx.py index 0a06d34..3716db0 100644 --- a/gedi-subset/osx.py +++ b/gedi-subset/osx.py @@ -12,11 +12,11 @@ import os import os.path -from typing import TypeAlias +from typing import TypeAlias, Union from returns.io import IOResultE, impure_safe -StrPath: TypeAlias = str | os.PathLike[str] +StrPath: TypeAlias = Union[str, os.PathLike[str]] exists = impure_safe(os.path.exists) diff --git a/gedi-subset/subset.py b/gedi-subset/subset.py index 9c46941..baba1ff 100755 --- a/gedi-subset/subset.py +++ b/gedi-subset/subset.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from enum import Enum from pathlib import Path -from typing import Any, Iterable, Tuple +from typing import Any, Iterable, List, Sequence, Tuple import geopandas as gpd import osx @@ -20,7 +20,7 @@ gdf_to_file, gdf_to_parquet, granule_intersects, - subset_h5, + subset_hdf5, ) from maap.maap import MAAP from maap.Result import Granule @@ -60,6 +60,8 @@ class SubsetGranuleProps: granule: Granule maap: MAAP aoi_gdf: gpd.GeoDataFrame + columns: Sequence[str] + query: str output_dir: Path @@ -77,19 +79,14 @@ def subset_granule(props: SubsetGranuleProps) -> Maybe[str]: GeoParquet file. """ - filter_cols = [ - "agbd", - "agbd_se", - "l4_quality_flag", - "sensitivity", - "lat_lowestmode", - "lon_lowestmode", - ] io_result = download_granule(props.maap, str(props.output_dir), props.granule) inpath = unsafe_perform_io(io_result.alt(raise_exception).unwrap()) logger.debug(f"Subsetting {inpath}") - gdf = df_assign("filename", inpath, subset_h5(inpath, props.aoi_gdf, filter_cols)) + gdf: gpd.GeoDataFrame = flow( + subset_hdf5(inpath, props.aoi_gdf, props.columns, props.query), + df_assign("filename", inpath), + ) osx.remove(inpath) if gdf.empty: @@ -115,6 +112,8 @@ def set_logging_level(logging_level: int) -> None: def subset_granules( maap: MAAP, aoi_gdf: gpd.GeoDataFrame, + columns: Sequence[str], + query: str, output_dir: Path, dest: Path, init_args: Tuple[Any, ...], @@ -142,7 +141,8 @@ def append_subset(src: str) -> IOResultE[str]: chunksize = 10 processes = os.cpu_count() payloads = ( - SubsetGranuleProps(granule, maap, aoi_gdf, output_dir) for granule in granules + SubsetGranuleProps(granule, maap, aoi_gdf, columns, query, output_dir) + for granule in granules ) logger.info(f"Subsetting on {processes} processes (chunksize={chunksize})") @@ -169,7 +169,6 @@ def main( resolve_path=True, ), doi=typer.Option( - # "10.3334/ORNLDAAC/1986", # GEDI L4A DOI, v2 "10.3334/ORNLDAAC/2056", # GEDI L4A DOI, v2.1 help="Digital Object Identifier of collection to subset (https://www.doi.org/)", ), @@ -177,6 +176,28 @@ def main( CMRHost.maap, help="CMR hostname", ), + columns: str = typer.Option( + ",".join( + [ + "agbd", + "agbd_se", + "l2_quality_flag", + "l4_quality_flag", + "lat_lowestmode", + "lon_lowestmode", + "sensitivity", + "sensitivity_a2", + ] + ), + help="Comma-separated list of columns to select", + ), + query: str = typer.Option( + "l2_quality_flag == 1" + " and l4_quality_flag == 1" + " and sensitivity > 0.95" + " and sensitivity_a2 > 0.95", + help="Boolean query expression to select rows", + ), limit: int = typer.Option( 10_000, help="Maximum number of granules to subset", @@ -224,6 +245,8 @@ def main( for subsets in subset_granules( maap, aoi_gdf, + [c.strip() for c in columns.split(",")], + query, output_dir, dest, (logging_level,), diff --git a/gedi-subset/subset.sh b/gedi-subset/subset.sh index 547e58a..431ed4a 100755 --- a/gedi-subset/subset.sh +++ b/gedi-subset/subset.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -set -xeuo pipefail +set -xuo pipefail basedir=$(dirname "$(readlink -f "$0")") @@ -16,7 +16,19 @@ if test -d input; then # We are executing within a DPS job, so the AOI file was automatically # downloaded to the `input` directory. aoi=$(ls input/*) - ${subset_py} --verbose --aoi "${aoi}" --limit "${1:-10000}" + + n_actual=${#} + n_expected=3 + + if test ${n_actual} -gt 0 -a ${n_actual} -ne ${n_expected}; then + echo "Expected ${n_expected} inputs, but got ${n_actual}:" $(printf " '%b'" "$@") >&2 + exit 1 + fi + + columns=$(test "${1:--}" != "-" && echo " --columns '${1:--}'") + query=$(test "${2:--}" != "-" && echo " --query '${2:--}'") + limit=$(test "${3:--}" != "-" && echo " --limit ${3:--}") + ${subset_py} --verbose --aoi "${aoi}"${columns}${query}${limit} else # This was invoked directly, so simply pass all arguments through to the # Python script.