Merging develop into Master as v1.1 (#10)

* Updated README.md * Common extractor - Initial Cut (#1) * Adding common image code * Minor changes, pylint-ing * Optimizing Dockerfile * Added/updated READMEs * Added some more configuration * Added method * Improvements to documentation and some fixes (#2) * Adding common image code * Minor changes, pylint-ing * Optimizing Dockerfile * Added/updated READMEs * Added some more configuration * Added method * pylint checks * Debugging * Updated README * Added args to instance * Added contribtors * Updating readme and pylint * Updating readme and pylint (#4) * Adding geo referenced image support (#6) * Added RGB plot base code for plot level template (#7) * Adding geo referenced image support * Adding rgb plot level base code * Removing common source due to incompatability * Basic RGB plot level transformer * Removed references to Drone Pipeline * Fixing timestamp issue (#8)
AgPipeline · Jan 13, 2020 · 5e9f29f · 5e9f29f
1 parent 5873e6c
commit 5e9f29f
Show file tree

Hide file tree

Showing 10 changed files with 1,030 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# pycharm
+.idea
diff --git a/.gitmodules b/.gitmodules
diff --git a/common-image/Dockerfile b/common-image/Dockerfile
@@ -1,4 +1,4 @@
-FROM agpipeline/base-image:1.0
+FROM agpipeline/base-image:1.1
 LABEL maintainer="Chris Schnaufer <[email protected]>"
 
 # Build environment values

diff --git a/common-image/transformer_class.py b/common-image/transformer_class.py
@@ -9,6 +9,8 @@
                                 get_season_and_experiment as tr_get_season_and_experiment, \
                                 get_extractor_metadata as tr_get_extractor_metadata
 from terrautils.sensors import Sensors
+from terrautils.imagefile import get_epsg as tr_get_epsg, \
+                                 image_get_geobounds as tr_image_get_geobounds
 import terrautils.lemnatec
 
 import configuration
@@ -42,15 +44,28 @@ def get_metadata_timestamp(metadata: dict) -> str:
 
         return timestamp
 
+    @staticmethod
+    def get_datestamp(timestamp: str) -> str:
+        """Returns the date of the timestamp
+        Arguments:
+            timestamp: assumed to be in ISO 8601 format
+        Return:
+            Returns the found date. If a non-ISO 8601 formatted timestamp is specified, the entire source timestamp
+            is returned.
+        """
+        if 'T' in timestamp:
+            return timestamp.split('T')[0]
+        return timestamp
+
 class Transformer():
     """Generic class for supporting transformers
     """
-    #pylint: disable=unused-argument
     def __init__(self, **kwargs):
         """Performs initialization of class instance
         Arguments:
             kwargs: additional parameters passed in to Transformer
         """
+        # pylint: disable=unused-argument
         self.sensor = None
         self.args = None
 
@@ -66,12 +81,41 @@ def sensor_name(self):
         """
         return configuration.TRANSFORMER_SENSOR
 
-    # pylint: disable=no-self-use
+    @property
+    def supported_image_file_exts(self):
+        """Returns the list of supported image file extension strings (in lower case)
+        """
+        return ['tif', 'tiff', 'jpg']
+
+    def get_image_file_epsg(self, source_path: str) -> str:
+        """Returns the EPSG of the georeferenced image file
+        Arguments:
+            source_path: the path to the image to load the EPSG code from
+        Return:
+            Returns the EPSG code loaded from the file. None is returned if there is a problem or the file
+            doesn't have an EPSG code
+        """
+        # pylint: disable=no-self-use
+        return tr_get_epsg(source_path)
+
+    def get_image_file_geobounds(self, source_path: str) -> list:
+        """Uses gdal functionality to retrieve rectilinear boundaries from the file
+        Args:
+            source_path(str): path of the file to get the boundaries from
+        Returns:
+            The upper-left and calculated lower-right boundaries of the image in a list upon success.
+            The values are returned in following order: min_y, max_y, min_x, max_x. A list of numpy.nan
+            is returned if the boundaries can't be determined
+        """
+        # pylint: disable=no-self-use
+        return tr_image_get_geobounds(source_path)
+
     def generate_transformer_md(self) -> dict:
         """Generates metadata about this transformer
         Returns:
             Returns the transformer metadata
         """
+        # pylint: disable=no-self-use
         return {
             'version': configuration.TRANSFORMER_VERSION,
             'name': configuration.TRANSFORMER_NAME,
@@ -80,31 +124,32 @@ def generate_transformer_md(self) -> dict:
             'repository': {'repUrl': configuration.REPOSITORY}
         }
 
-    # pylint: disable=no-self-use
     def add_parameters(self, parser: argparse.ArgumentParser) -> None:
         """Adds processing parameters to existing parameters
         Arguments:
             parser: instance of argparse
         """
+        # pylint: disable=no-self-use
         parser.add_argument('--logging', '-l', nargs='?', default=os.getenv("LOGGING"),
                             help='file or url or logging configuration (default=None)')
 
         parser.epilog = configuration.TRANSFORMER_NAME + ' version ' + configuration.TRANSFORMER_VERSION + \
                         ' author ' + configuration.AUTHOR_NAME + ' ' + configuration.AUTHOR_EMAIL
 
-    #pylint: disable=no-self-use
-    def get_transformer_params(self, args: argparse.Namespace, metadata: dict) -> dict:
+    def get_transformer_params(self, args: argparse.Namespace, metadata_list: list) -> dict:
         """Returns a parameter list for processing data
         Arguments:
             args: result of calling argparse.parse_args
             metadata: the loaded metadata
         """
+        # pylint: disable=no-self-use
         # Setup logging
         pyc_setup_logging(args.logging)
 
         self.args = args
 
         # Determine if we're using JSONLD (which we should be)
+        metadata = metadata_list[0]
         if 'content' in metadata:
             parse_md = metadata['content']
         else:
@@ -122,7 +167,8 @@ def get_transformer_params(self, args: argparse.Namespace, metadata: dict) -> di
 
         # Fetch experiment name from terra metadata
         season_name, experiment_name, updated_experiment = \
-                                    tr_get_season_and_experiment(timestamp, configuration.TRANSFORMER_TYPE, terraref_md)
+                                    tr_get_season_and_experiment(__internal__.get_datestamp(timestamp),
+                                                                 configuration.TRANSFORMER_TYPE, terraref_md)
 
         # Setup our sensor
         self.sensor = Sensors(base='', station='ua-mac', sensor=configuration.TRANSFORMER_SENSOR)
@@ -155,5 +201,5 @@ def get_transformer_params(self, args: argparse.Namespace, metadata: dict) -> di
 
         return {'check_md': check_md,
                 'transformer_md': tr_get_extractor_metadata(terraref_md, configuration.TRANSFORMER_NAME),
-                'full_md': parse_md
+                'full_md': [parse_md]
                }
diff --git a/rgb-plot-base-transformer/Dockerfile b/rgb-plot-base-transformer/Dockerfile
@@ -0,0 +1,31 @@
+FROM agpipeline/gantry-base-image:latest
+LABEL maintainer="Chris Schnaufer <[email protected]>"
+
+COPY requirements.txt packages.txt /home/extractor/
+
+USER root
+
+RUN [ -s /home/extractor/packages.txt ] && \
+    (echo 'Installing packages' && \
+        apt-get update && \
+        cat /home/extractor/packages.txt | xargs apt-get install -y --no-install-recommends && \
+        rm /home/extractor/packages.txt && \
+        apt-get autoremove -y && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*) || \
+    (echo 'No packages to install' && \
+        rm /home/extractor/packages.txt)
+
+RUN [ -s /home/extractor/requirements.txt ] && \
+    (echo "Install python modules" && \
+    python -m pip install -U --no-cache-dir pip && \
+    python -m pip install --no-cache-dir setuptools && \
+    python -m pip install --no-cache-dir -r /home/extractor/requirements.txt && \
+    rm /home/extractor/requirements.txt) || \
+    (echo "No python modules to install" && \
+    rm /home/extractor/requirements.txt)
+
+USER extractor
+
+COPY *.py /home/extractor/
+
diff --git a/rgb-plot-base-transformer/README.md b/rgb-plot-base-transformer/README.md
@@ -0,0 +1,31 @@
+# Transformer: base RGB Plot-level
+
+Provides the base image, or code, for plot-level RGB transformers for the UA Gantry Makeflow environment.
+
+The motivation behind this code is to significantly reduce the overhead in knowledge and work needed to add scientific algorithms to the pipeline.
+
+##  What's provided
+The transformer creates output CSV files in single process, or multi-process environments.
+If the output CSV files don't exist, they are created and initialized (the CSV header is written identifying the fields).
+If the output CSV files already exist, rows are appended to the files.
+No checks are made to determine if a particular entry already exists in the CSV files, data is just appended.
+
+By default a generic CSV file is produced, as well as CSV files compatible with [TERRA REF Geostreams](https://docs.terraref.org/user-manual/data-products/environmental-conditions) and with [BETYDB](https://www.betydb.org/).
+
+### Changing default CSV behavior
+Algorithm writers have the ability to override this default behavior with TERRA REF Geostreams and BETYdb through the definition of variables in their implementation file.
+* WRITE_GEOSTREAMS_CSV - if defined at the global level and set to `False` will suppress writing TERRA REF Geostreams CSV data for an algorithm.
+* WRITE_BETYDB_CSV - if defined at the global level and set to `False` will suppress writing BETYdb CSV data for an algorithm.
+
+In case people executing an algorithm wish to generate BETYdb or TERRA REF Geostreams CSV files, there are command line arguments that override the just mentioned global variable values to force writing. 
+Of course, these command line arguments are not necessary if the files are being written by default.
+
+### Output path
+The `--csv_path` parameter is key to getting multiple instances of RGB plot-level transformers writing to the same file.
+For each instance of the same transformer that's run (either single- or multi-process), using the same path indicates that the produced data should be appended to the CSV files (dependent upon runtime environments).
+Of course, if the file doesn't already exist it's first created and the CSV header written before data is written.
+
+If writing all the data to the same file isn't possible, or not desirable, this parameter can be modified to allow each instance to write its own file (including the CSV header).
+
+Note: if using Docker images this path is relative to the code running inside the container.
+
diff --git a/rgb-plot-base-transformer/configuration.py b/rgb-plot-base-transformer/configuration.py
@@ -0,0 +1,29 @@
+"""Contains transformer configuration information
+"""
+
+# The version number of the transformer
+TRANSFORMER_VERSION = '1.0'
+
+# The transformer description
+TRANSFORMER_DESCRIPTION = 'Base for plot-level RGB-based algorithm transformers'
+
+# Short name of the transformer
+TRANSFORMER_NAME = 'rgb-plot-level-base'
+
+# The sensor associated with the transformer
+TRANSFORMER_SENSOR = 'stereoTop'
+
+# The transformer type (eg: 'rgbmask', 'plotclipper')
+TRANSFORMER_TYPE = 'rgb.algorithm.base'
+
+# The name of the author of the extractor
+AUTHOR_NAME = 'Chris Schnaufer'
+
+# The email of the author of the extractor
+AUTHOR_EMAIL = '[email protected]'
+
+# Contributors to this transformer
+CONTRUBUTORS = []
+
+# Repository URI of where the source code lives
+REPOSITORY = 'https://github.com/AgPipeline/ua-gantry-environment'
diff --git a/rgb-plot-base-transformer/packages.txt b/rgb-plot-base-transformer/packages.txt
diff --git a/rgb-plot-base-transformer/requirements.txt b/rgb-plot-base-transformer/requirements.txt
-Original file line number
+Diff line change
@@ Expand Up / @@ -102,3 +102,6 @@ venv.bak/ @@
     # mypy
     .mypy_cache/
+    # pycharm
+    .idea