Skip to content

Commit

Permalink
feat: correctly handle new redshift table format
Browse files Browse the repository at this point in the history
  • Loading branch information
dbirman committed Nov 8, 2024
1 parent 2bc1543 commit c2f1db7
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 284 deletions.
31 changes: 13 additions & 18 deletions src/aind_metadata_viz/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,19 @@
import param
import os
import numpy as np

import time
from io import StringIO

from aind_data_schema_models.modalities import (
Modality,
ExpectedFiles,
FileRequirement,
)
from aind_metadata_viz.metadata_helpers import (
process_record_list,
)
from aind_metadata_viz.metadata_class_map import (
first_layer_field_mapping,
second_layer_field_mappings,
)
from aind_metadata_viz.utils import METASTATE_MAP

API_GATEWAY_HOST = os.getenv("API_GATEWAY_HOST", "api.allenneuraldynamics-test.org")
DATABASE = os.getenv("DATABASE", "metadata_index")
Expand Down Expand Up @@ -81,8 +79,12 @@ def __init__(
):
"""Initialize"""
# get data
self._file_data = _get_file_presence(test_mode=test_mode)
start = time.time()
self._file_data = _get_metadata(test_mode=test_mode)
print(time.time() - start)
start = time.time()
self._status_data = _get_status()
print(time.time() - start)

# inner join only keeps records that are in both dataframes
self.data = pd.merge(self._file_data, self._status_data, on="_id", how="inner")
Expand Down Expand Up @@ -278,29 +280,22 @@ def get_csv(self, vp_state: str = "Not Valid/Present"):
sio = StringIO()
df.to_csv(sio, index=False)
return sio.getvalue()


@pn.cache(ttl=CACHE_RESET_DAY)
def _get_metadata(test_mode=False) -> pd.DataFrame:
"""Get the metadata fields, modality, derived, name, location, created
Parameters
----------
test_mode : bool, optional
_description_, by default False
"""


def _get_status() -> pd.DataFrame:
"""Get the status of the metadata
"""
response = rds_client.read_table(RDS_TABLE_NAME)

# replace values using the int -> string map
response.replace(METASTATE_MAP, inplace=True)

return response


@pn.cache(ttl=CACHE_RESET_DAY)
def _get_file_presence(test_mode=False) -> pd.DataFrame:
"""Get all and convert to data frame format
def _get_metadata(test_mode=False) -> pd.DataFrame:
"""Get metadata about records in DocDB
Parameters
----------
Expand Down
177 changes: 0 additions & 177 deletions src/aind_metadata_viz/metadata_helpers.py

This file was deleted.

23 changes: 17 additions & 6 deletions src/aind_metadata_viz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,23 @@
]


class MetaState(str, Enum):
VALID = "valid"
PRESENT = "present"
OPTIONAL = "optional"
MISSING = "missing"
EXCLUDED = "excluded"
METASTATE_MAP = {
2: "valid",
1: "present",
0: "optional",
-1: "missing",
-2: "excluded",
-3: "corrupt",
}


class MetadataState(int, Enum):
VALID = 2 # validates as it's class
PRESENT = 1 # present
OPTIONAL = 0 # missing, but it's optional
MISSING = -1 # missing, and it's required
EXCLUDED = -2 # excluded for all modalities in the metadata
CORRUPT = -3 # corrupt, can't be loaded from json


REMAPS = {
Expand Down
83 changes: 0 additions & 83 deletions tests/test_metadata_helpers.py

This file was deleted.

0 comments on commit c2f1db7

Please sign in to comment.