Apply inline suggestions from code review

pmhalvor · Oct 5, 2024 · b4c6a23 · b4c6a23
1 parent 50109f2
commit b4c6a23
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 9 deletions.
diff --git a/src/config/common.yaml b/src/config/common.yaml
@@ -5,7 +5,7 @@ pipeline:
     show_plots: false
     is_local: false
 
-    # gcp
+    # gcp - bigquery
     project: "bioacoustics-2024"
     dataset_id: "whale_speech"
 

diff --git a/src/config/local.yaml b/src/config/local.yaml
@@ -3,7 +3,7 @@ pipeline:
     verbose: true
     debug: true
     show_plots: false
-    # is_local: true
+    is_local: true
 
   search:
     export_template: "data/encounters/{filename}-{timeframe}.csv"

diff --git a/src/model_server.py b/src/model_server.py
@@ -10,8 +10,7 @@
 
 # Load the TensorFlow model
 logging.info("Loading model...")
-model = hub.load("https://www.kaggle.com/models/google/humpback-whale/TensorFlow2/humpback-whale/1")
-# model = hub.load("https://tfhub.dev/google/humpback_whale/1")
+model = hub.load(config.classify.model_uri)
 score_fn = model.signatures["score"]
 logging.info("Model loaded.")
 

diff --git a/src/stages/classify.py b/src/stages/classify.py
@@ -32,7 +32,7 @@ def __init__(self, config: SimpleNamespace):
 
         self.batch_duration     = config.classify.batch_duration
         self.model_sample_rate  = config.classify.model_sample_rate
-        self.inference_url          = config.classify.inference_url
+        self.inference_url      = config.classify.inference_url
 
         # plotting parameters
         self.hydrophone_sensitivity = config.classify.hydrophone_sensitivity

diff --git a/src/stages/postprocess.py b/src/stages/postprocess.py
@@ -4,14 +4,12 @@
 import pandas as pd
 import os 
 
-# from google.cloud import bigquery
 from apache_beam.io.gcp.internal.clients import bigquery
 
 from typing import Dict, Any, Tuple
 from types import SimpleNamespace
 
 
-# class PostprocessLabels(beam.PTransform):
 class PostprocessLabels(beam.DoFn):
     def __init__(self, config: SimpleNamespace):
         self.config = config
@@ -30,7 +28,7 @@ def process(self, element, search_output):
         # convert element to dataframe
         classifications_df = self._build_classification_df(element)
 
-        # convert search_output to dataframe
+        # clean up search_output dataframe
         search_output_df = self._build_search_output_df(search_output)
 
         # join dataframes
@@ -73,7 +71,6 @@ def _build_search_output_df(self, search_output: Dict[str, Any]) -> pd.DataFrame
         search_output = search_output.rename(columns={"id": "encounter_id"})
         search_output["encounter_id"] = search_output["encounter_id"].astype(str)
         search_output = search_output[[
-            # TODO refactor to confing
             "encounter_id",
             "latitude",
             "longitude",
@@ -180,6 +177,7 @@ def _write_local(self, element):
         element_df = pd.DataFrame(element, columns=self.columns)
         final_df = pd.concat([stored_df, element_df], ignore_index=True)
         final_df = final_df.drop_duplicates()
+        logging.debug(f"Appending df to {self.output_path} \n{final_df}")
 
         # store as json (hack: to remove \/\/ escapes)
         final_df_json = final_df.to_json(orient="records").replace("\\/", "/")