Merge branch 'devel'

DARPA-CRITICALMAAS · Aug 27, 2024 · 1efd51f · 1efd51f
2 parents 4c21745 + b9310aa
commit 1efd51f
Show file tree

Hide file tree

Showing 29 changed files with 314 additions and 155 deletions.
diff --git a/Makefile b/Makefile
@@ -4,6 +4,10 @@ SEGMENTATION_MODEL ?=
 # The path of the points model to use
 POINTS_MODEL ?=
 
+# command seperated list of platforms to build images for - example: linux/amd64,linux/arm64
+# leaving unset will build for the host platform
+PLATFORMS ?=
+
 # Tag used for development images
 DEV_TAG := test
 
@@ -18,38 +22,38 @@ IMAGE_NAMES := lara-cdr lara-georef lara-point-extract lara-segmentation lara-me
 # Target: build_segmentation
 # Description: Builds the segmentation component.
 build_segmentation:
-	@echo "\n*** Building segmentation ***"
-	@cd pipelines/segmentation/deploy && ./build.sh $(SEGMENTATION_MODEL)
+	@echo "*** Building segmentation ***\n"
+	@cd pipelines/segmentation/deploy && ./build.sh $(SEGMENTATION_MODEL) $(PLATFORMS)
 
 # Target: build_metadata
 # Description: Builds the metadata extraction component.
 build_metadata:
-	@echo "\n*** Building metadata extraction...\n"
-	@cd pipelines/metadata_extraction/deploy && ./build.sh $(SEGMENTATION_MODEL)
+	@echo "*** Building metadata extraction ***\n"
+	@cd pipelines/metadata_extraction/deploy && ./build.sh $(SEGMENTATION_MODEL) $(PLATFORMS)
 
 # Target: build_points
 # Description: Builds the point extraction component.
 build_points:
-	@echo "\n*** Building point extraction ***"
-	@cd pipelines/point_extraction/deploy && ./build.sh $(POINTS_MODEL) $(SEGMENTATION_MODEL)
+	@echo "*** Building point extraction ***\n"
+	@cd pipelines/point_extraction/deploy && ./build.sh $(POINTS_MODEL) $(SEGMENTATION_MODEL) $(PLATFORMS)
 
 # Target: build_georef
 # Description: Builds the georeferencing component.
 build_georef:
-	@echo "\n*** Building georeferencing *** "
-	@cd pipelines/geo_referencing/deploy && ./build.sh $(SEGMENTATION_MODEL)
+	@echo "*** Building georeferencing ***\n"
+	@cd pipelines/geo_referencing/deploy && ./build.sh $(SEGMENTATION_MODEL) $(PLATFORMS)
 
 # Target: build_text
 # Description: Builds the text extraction component.
 build_text:
-	@echo "\n*** Building text extraction *** "
-	@cd pipelines/text_extraction/deploy && ./build.sh
+	@echo "*** Building text extraction ***\n"
+	@cd pipelines/text_extraction/deploy && ./build.sh $(PLATFORMS)
 
 # Target: build_cdr
 # Description: Builds the CDR mediator component.
 build_cdr:
-	@echo "\n*** Building CDR mediator ***"
-	@cd cdr/deploy && ./build.sh
+	@echo "*** Building CDR mediator ***"\n
+	@cd cdr/deploy && ./build.sh $(PLATFORMS)
 
 # Target: build
 # Description: Builds all components.

diff --git a/README.md b/README.md
@@ -13,6 +13,12 @@ This repository contains five pipelines:
 * [Georeferencing](pipelines/geo_referencing/README.md) - computes an image space to geo space transform given an input map image
 * [Text Extraction](pipelines/text_extraction/README.md) - extracts text as individual words, lines or paragraphs/blocks from an input image
 
+### Development 
+
 The `tasks` directory contains the `pip` installable library of tasks and supporting utilities, with each pipeline found in the `pipelines` directory being composed of these tasks.  Each pipeline is itself `pip` installable, and is accompanied by a wrapper to support command line execution (`run_pipeline.py`), and a server wrapper to support execution as a REST service (`run_server.py`).  Scripts to build the server wrapper into a Docker container are also included.
 
 A [Makefile](./Makefile) is also available to handle building and deploying Docker containers for the various LARA pipelines.
+
+### Deployment
+
+The full LARA stack that supports integration with the CriticalMAAS CDR can be deployed via `docker compose`.  See [instructions](deploy/README.md).  
diff --git a/cdr/deploy/build.sh b/cdr/deploy/build.sh
@@ -8,11 +8,17 @@ cp -r ../../schema .
 cp -r ../../tasks .
 cp -r ../../util .
 
-# run the build
-docker build -t uncharted/lara-cdr:latest .
+# run the build with the platform argument if provided, otherwise build for the host architecture
+platform=${1:-}
+if [[ -n "$platform" ]]; then
+    echo "Platform: $platform"
+    docker buildx build --platform "$platform" -t uncharted/lara-cdr:latest . --load
+else
+    docker build -t uncharted/lara-cdr:latest .
+fi
 
 # cleanup the temp files
 rm -rf cdr
 rm -rf schema
 rm -rf tasks
-rm -rf util
+rm -rf util
diff --git a/cdr/deploy/build_all.sh b/cdr/deploy/build_all.sh
diff --git a/cdr/deploy/tag_containers.sh b/cdr/deploy/tag_containers.sh
diff --git a/cdr/deploy/tag_local_container.sh b/cdr/deploy/tag_local_container.sh
diff --git a/deploy/README.md b/deploy/README.md
@@ -0,0 +1,53 @@
+# LARA Stack Deployment
+
+## Pre-requisites
+1. A Python 3.10 environment
+1. An activated NGROK account and token (see [https://ngrok.com/docs/getting-started/])
+1. An OpenAI API key
+1. A Google Cloud Vision API JSON key *file*
+1. A CriticalMAAS CDR API key (contact Jataware)
+
+## Setup
+
+Install Jinja-2 CLI:
+```console
+pip install jinja2-cli
+```
+
+Make a copy of the `vars_example.json` file found in this directory and update it with the information specific to your environment.  
+
+```console
+cp vars_example.json deploy_vars.json
+```
+
+The fields are:
+
+* `work_dir`: A directory on the deployment host system that will store intermediate pipeline outputs 
+* `image_dir`:  A directory on the deployment host system that will store COGs fetched from the CDR
+* `cdr_api_token`: A CDR API token string provided by Jataware
+* `ngrok_authtoken`:  NGROK auth token string 
+* `openai_api_key`: Open AI API key string
+* `google_application_credentials`: The path to the Google Cloud Vision API JSON key file   
+* `tag`: The docker tag of the LARA images to deploy (ie. `latest`)
+
+Generate a docker compose file from your variables:
+```console
+./gen_compose.sh deploy_vars.json
+```
+
+This should create a new `docker-compose.yml` files with values derived from the `deploy_var.json` file.  LARA containers can now be pulled by running:
+```console
+docker compose pull
+```
+
+The system can be started by running:
+```console
+docker compose up
+```
+
+and stopped by running:
+```console
+docker compose stop
+```
+
+Once running, the system will respond to maps by being added to the CDR by executing the LARA pipelines and uploading results.
diff --git a/deploy/docker-compose.j2 b/deploy/docker-compose.j2
@@ -22,7 +22,7 @@ services:
       - CDR_API_TOKEN={{ cdr_api_token }}
       - NGROK_AUTHTOKEN={{ ngrok_authtoken }}
     volumes:
-      - {{ cdr_workdir }}:/workdir
+      - {{ work_dir }}:/workdir
       - {{ image_dir }}:/imagedir
     depends_on:
       rabbitmq:
@@ -33,13 +33,13 @@ services:
 
   georef:
     image: uncharted/lara-georef:{{ tag }}
-    command: ["--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", {{ segmentation_model_weights }}, "--result_queue", "lara_result_queue"]
+    command: ["--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", "pipelines/segmentation_weights", "--result_queue", "lara_result_queue"]
     environment:
       - OPENAI_API_KEY={{ openai_api_key }}
       - GOOGLE_APPLICATION_CREDENTIALS=/credentials.json
     volumes:
       - {{ google_application_credentials }}:/credentials.json
-      - {{ georef_workdir }}:/workdir
+      - {{ work_dir }}:/workdir
       - {{ image_dir }}:/imagedir
     depends_on:
       rabbitmq:
@@ -51,12 +51,12 @@ services:
   points:
     image: uncharted/lara-point-extract:{{ tag }}
     command: [
-      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq","--model_point_extractor", {{ point_model_weights }}, "--model_segmenter", {{ segmentation_model_weights }}, "--result_queue", "lara_result_queue"]
+      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq","--model_point_extractor", "pipelines/point_extraction_weights/points.pt", "--model_segmenter", "pipelines/segmentation_weights", "--result_queue", "lara_result_queue"]
     environment:
       - GOOGLE_APPLICATION_CREDENTIALS=/credentials.json
     volumes:
       - {{ google_application_credentials }}:/credentials.json
-      - {{ point_extract_workdir }}:/workdir
+      - {{ work_dir }}:/workdir
       - {{ image_dir }}:/imagedir
     depends_on:
       rabbitmq:
@@ -68,9 +68,12 @@ services:
   segmentation:
     image: uncharted/lara-segmentation:{{ tag }}
     command: [
-      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", {{ segmentation_model_weights }}, "--result_queue", "lara_result_queue"]
+      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", "pipelines/segmentation_weights", "--result_queue", "lara_result_queue"]
+    environment:
+      - GOOGLE_APPLICATION_CREDENTIALS=/credentials.json
     volumes:
-      - {{ segmentation_workdir }}:/workdir
+      - {{ google_application_credentials }}:/credentials.json
+      - {{ work_dir }}:/workdir
       - {{ image_dir }}:/imagedir
     depends_on:
       rabbitmq:
@@ -82,13 +85,13 @@ services:
   metadata:
     image: uncharted/lara-metadata-extract:{{ tag }}
     command: [
-      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", {{ segmentation_model_weights }}, "--result_queue", "lara_result_queue"]
+      "--workdir", "/workdir", "--imagedir", "/imagedir", "--rabbit_host", "rabbitmq", "--model", "pipelines/segmentation_weights", "--result_queue", "lara_result_queue"]
     environment:
       - OPENAI_API_KEY={{ openai_api_key }}
       - GOOGLE_APPLICATION_CREDENTIALS=/credentials.json
     volumes:
       - {{ google_application_credentials }}:/credentials.json
-      - {{ metadata_workdir }}:/workdir
+      - {{ work_dir }}:/workdir
       - {{ image_dir }}:/imagedir
     depends_on:
       rabbitmq:

diff --git a/deploy/gen_compose.sh b/deploy/gen_compose.sh
@@ -3,4 +3,4 @@
 # args: $1 - path to JSON file containing the Jinja template variable values - see `vars_example.json` for the 
 # the file structure.
 
-jinja --data $1 --format json --output docker-compose.yml docker-compose.j2
+jinja2 --format json --outfile docker-compose.yml docker-compose.j2 $1 
diff --git a/deploy/vars_example.json b/deploy/vars_example.json
@@ -1,15 +1,9 @@
 {
-    "cdr_api_token": "",
-    "cdr_workdir": "",
-    "georef_workdir": "",
-    "point_extract_workdir": "",
-    "segmentation_workdir": "",
-    "metadata_workdir": "",
-    "image_dir": "",
-    "ngrok_authtoken": "",
-    "openai_api_key": "",
-    "google_application_credentials": "",
-    "segmentation_model_weights": "",
-    "point_model_weights": "",
-    "tag": ""
+    "work_dir": "~/lara/working",
+    "image_dir": "~/lara/images",
+    "cdr_api_token": "3b8df2be0b494c50ab3562b0c429bb58",
+    "ngrok_authtoken": "585d02d615e74454833baa5533c6ed33",
+    "openai_api_key": "fee7e90ea27b49ef9b9fcb49c3ee4ceb",
+    "google_application_credentials": "~/lara/credentials/google_vision_api_key.json",
+    "tag": "latest"
 }
diff --git a/pipelines/geo_referencing/deploy/build.sh b/pipelines/geo_referencing/deploy/build.sh
@@ -19,11 +19,17 @@ then
 else
     segment_model=$1
     echo "Segment model weights dir: $segment_model"
-    cp -r $segment_model pipelines/segmentation_weights
+    cp -r $segment_model/* pipelines/segmentation_weights
 fi
 
-# run the build
-docker buildx build --platform linux/amd64,linux/arm64 -t uncharted/lara-georef:latest .
+# run the build with the platform argument if provided, otherwise build for the host architecture
+platform=${2:-}
+if [[ -n "$platform" ]]; then
+    echo "Platform: $platform"
+    docker buildx build --platform "$platform" -t uncharted/lara-georef:latest . --load
+else
+    docker build -t uncharted/lara-georef:latest .
+fi
 
 # cleanup the temp files
 rm -rf pipelines

diff --git a/pipelines/geo_referencing/factory.py b/pipelines/geo_referencing/factory.py
@@ -242,10 +242,10 @@ def create_geo_referencing_pipelines(
             )
         )
         tasks.append(GeoFencer("geofence"))
-    tasks.append(GeoCoordinatesExtractor("third"))
-    tasks.append(ROIFilter("roiness"))
-    tasks.append(DistinctDegreeOutlierFilter("uniqueness"))
-    tasks.append(HighQualityCoordinateFilter("goodness"))
+    tasks.append(GeoCoordinatesExtractor("geo_coordinates_extractor"))
+    tasks.append(ROIFilter("roi_filter"))
+    tasks.append(DistinctDegreeOutlierFilter("uniqueness_filter"))
+    tasks.append(HighQualityCoordinateFilter("quality_filter"))
     tasks.append(OutlierFilter("fourth"))
     tasks.append(NaiveFilter("fun"))
     if extract_metadata:

diff --git a/pipelines/metadata_extraction/deploy/build.sh b/pipelines/metadata_extraction/deploy/build.sh
@@ -19,11 +19,17 @@ then
 else
     segment_model=$1
     echo "Segment model weights dir: $segment_model"
-    cp -r $segment_model pipelines/segmentation_weights
+    cp -r $segment_model/* pipelines/segmentation_weights
 fi
 
-# run the build
-docker buildx build --platform linux/amd64,linux/arm64  -t uncharted/lara-metadata-extract:latest .  --push
+# run the build with the platform argument if provided, otherwise build for the host architecture
+platform=${2:-}
+if [[ -n "$platform" ]]; then
+    echo "Platform: $platform"
+    docker buildx build --platform "$platform" -t uncharted/lara-metadata-extract:latest . --load
+else
+    docker build -t uncharted/lara-metadata-extract:latest .
+fi
 
 # cleanup the temp files
 rm -rf pipelines

diff --git a/pipelines/metadata_extraction/metadata_extraction_pipeline.py b/pipelines/metadata_extraction/metadata_extraction_pipeline.py
@@ -161,19 +161,20 @@ def create_output(self, pipeline_result: PipelineResult) -> Output:
                 draw.polygon(
                     points,
                     outline="#ff497b",
-                    width=1,
+                    width=4,
                 )
         # draw in the map region bounds
+        colors = ["#5ec04a", "#4a90e2"]
         if SEGMENTATION_OUTPUT_KEY in pipeline_result.data:
             map_segmentation = MapSegmentation.model_validate(
                 pipeline_result.data[SEGMENTATION_OUTPUT_KEY]
             )
-            for segment in map_segmentation.segments:
+            for idx, segment in enumerate(map_segmentation.segments):
                 points = [(point[0], point[1]) for point in segment.poly_bounds]
                 draw.polygon(
                     points,
-                    outline="#5ec04a",
-                    width=1,
+                    outline=colors[idx % len(colors)],
+                    width=8,
                 )
         return ImageOutput(
             pipeline_result.pipeline_id, pipeline_result.pipeline_name, text_image