Skip to content

Commit

Permalink
U/gblackadder/fix template issues (#6)
Browse files Browse the repository at this point in the history
* simplify writers and better support templates

* remove the dict like set item that no ones uses

* dependabot identified issue with certifi library
  • Loading branch information
gblackadder authored Nov 7, 2024
1 parent 12cdb85 commit 5c508a6
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 71 deletions.
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

107 changes: 62 additions & 45 deletions pydantic_schemas/metadata_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import importlib.metadata
from copy import copy
from typing import Dict, List, Optional, Type, Union

Expand All @@ -21,11 +22,13 @@
from .utils.quick_start import make_skeleton
from .utils.utils import merge_dicts, standardize_keys_in_dict

__version__ = importlib.metadata.version("metadataschemas")


class MetadataManager:
"""
Interface with Excel for creating, saving and updating metadata for various types:
document, indicator, indicators_db, microdata, resource, script, table, video
document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video
Retrieve pydantic model definitions for each metadata type
"""
Expand Down Expand Up @@ -85,7 +88,7 @@ def standardize_metadata_name(self, metadata_name: str) -> str:
metadata_name = "microdata"
elif metadata_name == "timeseries":
metadata_name = "indicator"
elif metadata_name == "timeseries_db":
elif metadata_name == "timeseries_db" or metadata_name == "indicator_db":
metadata_name = "indicators_db"
self._raise_if_unsupported_metadata_name(metadata_name=metadata_name)
return metadata_name
Expand All @@ -100,6 +103,43 @@ def create_metadata_outline(
skeleton_object = make_skeleton(schema, debug=debug)
return skeleton_object

def _get_name_version_schema_writer(self, metadata_name_or_class):
"""
Determines the metadata name, version, schema, and writer based on the provided metadata name or class.
Args:
metadata_name_or_class (str or class): The metadata name as a string or the metadata class.
Returns:
tuple: A tuple containing:
- metadata_name (str): The standardized metadata name.
- version (str): The version information of the metadata.
- schema (type(BaseModel)): The schema associated with the metadata.
- writer (function): The writer function for the metadata.
If `metadata_name_or_class` is a string or is one of the standard metadata types (document,
geospatial, image, indicator, indicators_db, microdata, resource, script, table, video),
it retrieves the corresponding metadata name, schema, version, and writer from the internal
mappings. Otherwise, it assumes this is a template and retrieves the title from the class,
and uses a default single page writer function.
"""
if isinstance(metadata_name_or_class, str) or metadata_name_or_class in self._TYPE_TO_SCHEMA.values():
if isinstance(metadata_name_or_class, str):
metadata_name = self.standardize_metadata_name(metadata_name_or_class)
schema = self._TYPE_TO_SCHEMA[metadata_name]
else:
for metadata_name, schema in self._TYPE_TO_SCHEMA.items():
if schema is metadata_name_or_class:
break
version = f"{metadata_name} type metadata version {__version__}"
writer = self._TYPE_TO_WRITER[metadata_name]
else:
writer = write_to_single_sheet
metadata_name = metadata_name_or_class.model_json_schema()["title"]
version = f"Template: {metadata_name}"
schema = metadata_name_or_class
return metadata_name, version, schema, writer

def write_metadata_outline_to_excel(
self,
metadata_name_or_class: Union[str, Type[BaseModel]],
Expand All @@ -111,9 +151,7 @@ def write_metadata_outline_to_excel(
Args:
metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
document, indicator, indicators_db, microdata, resource, script, table, video
Currently not supported:
geospatial, image
document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video
If passed as a BaseModel type, for instance this is what you would do with a template, then the writer
defaults to a single page.
filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_name}_metadata.xlsx
Expand All @@ -125,33 +163,21 @@ def write_metadata_outline_to_excel(
Outputs:
An Excel file into which metadata can be entered
"""
if isinstance(metadata_name_or_class, str):
metadata_name = self.standardize_metadata_name(metadata_name_or_class)
# if metadata_name == "geospatial":
# raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")
skeleton_object = self.create_metadata_outline(metadata_name, debug=False)
writer = self._TYPE_TO_WRITER[metadata_name]
if filename is None:
filename = f"{metadata_name}_metadata.xlsx"
if title is None:
title = f"{metadata_name.capitalize()} Metadata"
else:
skeleton_object = make_skeleton(metadata_name_or_class, debug=False)
writer = write_to_single_sheet
metadata_name = metadata_name_or_class.model_json_schema()["title"]
if filename is None:
filename = f"{metadata_name}_metadata.xlsx"
if title is None:
title = f"{metadata_name.capitalize()} Metadata"
metadata_name, version, schema, writer = self._get_name_version_schema_writer(metadata_name_or_class)
skeleton_object = self.create_metadata_outline(schema, debug=False)

if filename is None:
filename = f"{metadata_name}_metadata.xlsx"
if title is None:
title = f"{metadata_name.capitalize()} Metadata"

if not str(filename).endswith(".xlsx"):
filename += ".xlsx"
writer(filename, skeleton_object, metadata_name, title)
writer(filename, skeleton_object, version, title)
return filename

def save_metadata_to_excel(
self,
metadata_name_or_class: Union[str, Type[BaseModel]],
object: BaseModel,
filename: Optional[str] = None,
title: Optional[str] = None,
Expand All @@ -161,11 +187,6 @@ def save_metadata_to_excel(
Save an Excel document of the given metadata object.
Args:
metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
document, indicator, indicators_db, microdata, resource, script, table, video
Currently not supported:
geospatial, image
If passed as a BaseModel type, for instance this is what you would do with a template, then the writer defaults to a single page.
object (BaseModel): The pydantic object to save to the Excel file.
filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx
title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata'
Expand All @@ -176,17 +197,10 @@ def save_metadata_to_excel(
Outputs:
An Excel file containing the metadata from the pydantic object. This file can be updated as needed.
"""
if isinstance(metadata_name_or_class, str):
metadata_name = self.standardize_metadata_name(metadata_name_or_class)
# if metadata_name == "geospatial":
# raise NotImplementedError("Geospatial schema contains an infinite loop so cannot be written to excel")
schema = self.metadata_class_from_name(metadata_name)
writer = self._TYPE_TO_WRITER[metadata_name]
else:
metadata_name = metadata_name_or_class.model_json_schema()["title"]
schema = metadata_name_or_class
writer = write_to_single_sheet
skeleton_object = self.create_metadata_outline(metadata_name_or_class=metadata_name_or_class, debug=False)
metadata_name, version, schema, writer = self._get_name_version_schema_writer(
type(object)
) # metadata_name_or_class)
skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False)

if filename is None:
filename = f"{metadata_name}_metadata.xlsx"
Expand All @@ -201,7 +215,7 @@ def save_metadata_to_excel(
)
combined_dict = standardize_keys_in_dict(combined_dict)
new_ob = schema.model_validate(combined_dict)
writer(filename, new_ob, metadata_name, title, verbose=verbose)
writer(filename, new_ob, version, title, verbose=verbose)
return filename

@staticmethod
Expand All @@ -222,12 +236,15 @@ def _get_metadata_name_from_excel_file(filename: str) -> str:
workbook.close()

if not type_info or not isinstance(type_info, str):
raise ValueError(f"Cell C3 is empty or not a string. {error_message}")
raise ValueError(f"Cell C1 is empty or not a string. {error_message}")

cell_values = type_info.split(" ")

if cell_values[0] == "Template:":
return " ".join(cell_values[1:])

if len(cell_values) < 3 or cell_values[1] != "type" or cell_values[2] != "metadata":
raise ValueError(f"Cell C3 is improperly formatted. {error_message}")
raise ValueError(f"Cell C1 is improperly formatted. {error_message}")

return cell_values[0]

Expand All @@ -236,7 +253,7 @@ def read_metadata_from_excel(
) -> BaseModel:
"""
Read in metadata from an appropriately formatted Excel file as a pydantic object.
If using standard metadata types (document, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
If using standard metadata types (document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video) then there is no need to pass in the metadata_class. But if using a template, then the class must be provided.
Args:
filename (str): The path to the Excel file.
Expand Down
49 changes: 44 additions & 5 deletions pydantic_schemas/tests/test_metadata_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import random
import string
from copy import copy
from typing import List, Optional

import pytest
from pydantic import BaseModel, ValidationError
Expand Down Expand Up @@ -162,7 +163,7 @@ def test_metadata_by_name(tmpdir, metadata_name):

# Save the read metadata to a new file
filename2 = tmpdir.join(f"test_{metadata_name}_save.xlsx")
mm.save_metadata_to_excel(metadata_name_or_class=metadata_name, object=tmp, filename=filename2, title=metadata_name)
mm.save_metadata_to_excel(object=tmp, filename=filename2, title=metadata_name)

for i in range(10):
modl = mm.create_metadata_outline(metadata_name_or_class=metadata_name)
Expand All @@ -171,9 +172,7 @@ def test_metadata_by_name(tmpdir, metadata_name):
# Write filled in metadata
filename3 = tmpdir.join(f"test_{metadata_name}_{i}.xlsx")
# filename3 = f"test_{metadata_name}_{i}.xlsx"
mm.save_metadata_to_excel(
metadata_name_or_class=metadata_name, object=modl, filename=filename3, title=metadata_name
)
mm.save_metadata_to_excel(object=modl, filename=filename3, title=metadata_name)

# Read the metadata back
actual = mm.read_metadata_from_excel(filename=filename3)
Expand All @@ -199,7 +198,7 @@ def test_metadata_by_class(tmpdir, metadata_name):
filename=tmpdir.join(f"test_class_{metadata_name}.xlsx"),
title=metadata_name,
)
mm.read_metadata_from_excel(filename=filename_class, metadata_class=metadata_class)
mm.read_metadata_from_excel(filename=filename_class)


def test_standardize_metadata_name():
Expand Down Expand Up @@ -244,3 +243,43 @@ def test_standardize_metadata_name():

with pytest.raises(ValueError):
mm.standardize_metadata_name("Bad-name")


def test_write_read_and_save_for_templates(tmpdir):
class Simple(BaseModel):
a: str
b: List[str]

class Midlevel(BaseModel):
c: Optional[str] = None
d: Optional[List[Simple]]

class TopLevel(BaseModel):
e: Optional[Midlevel]
f: Optional[int]

mm = MetadataManager()
filename1 = tmpdir.join(f"test_templates_1.xlsx")

mm.write_metadata_outline_to_excel(TopLevel, filename=filename1, title="Outline Test")

assert mm._get_metadata_name_from_excel_file(filename1) == "TopLevel"

example = TopLevel(
e=Midlevel(
c="c_value",
d=[
Simple(a="a_value", b=["the", "quick", "brown", "fox"]),
Simple(a="a_value_2", b=["jumped", "over", "the", "lazy", "dog"]),
],
),
f=99,
)

filename2 = tmpdir.join(f"test_templates_2.xlsx")
mm.save_metadata_to_excel(example, filename2)

assert mm._get_metadata_name_from_excel_file(filename2) == "TopLevel"

actual = mm.read_metadata_from_excel(filename2, TopLevel)
assert actual == example
3 changes: 1 addition & 2 deletions pydantic_schemas/utils/excel_to_pydantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,15 @@
import numpy as np
import pandas as pd
from pydantic import BaseModel, create_model
from utils.pydantic_to_excel import pydantic_to_dataframe

from ..utils.pydantic_to_excel import pydantic_to_dataframe
from .quick_start import make_skeleton
from .utils import (
annotation_contains_pydantic,
get_subtype_of_optional_or_list,
is_dict_annotation,
is_list_annotation,
is_optional_annotation,
is_optional_list,
seperate_simple_from_pydantic,
standardize_keys_in_dict,
subset_pydantic_model_type,
Expand Down
11 changes: 2 additions & 9 deletions pydantic_schemas/utils/pydantic_to_excel.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import copy
import importlib.metadata
import json
import os
from enum import Enum
from typing import List, Optional, Tuple, Union, get_args

__version__ = importlib.metadata.version("metadataschemas")

import pandas as pd
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side
Expand Down Expand Up @@ -507,15 +504,12 @@ def create_sheet(workbook, sheetname, sheet_number):
return new_sheet


def write_to_single_sheet(
doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False
):
def write_to_single_sheet(doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False):
model_default_name = ob.model_json_schema()["title"]
if title is None:
title = model_default_name
wb = open_or_create_workbook(doc_filepath)
ws = create_sheet(wb, "metadata", sheet_number=0)
version = f"{metadata_type} type metadata version {__version__}"
current_row = write_title_and_version_info(ws, title, version, protect_title=False)
current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose)
correct_column_widths(worksheet=ws)
Expand All @@ -525,11 +519,10 @@ def write_to_single_sheet(


def write_across_many_sheets(
doc_filepath: str, ob: BaseModel, metadata_type: str, title: Optional[str] = None, verbose=False
doc_filepath: str, ob: BaseModel, version: str, title: Optional[str] = None, verbose=False
):
wb = open_or_create_workbook(doc_filepath)
ws = create_sheet(wb, "metadata", sheet_number=0)
version = f"{metadata_type} type metadata version {__version__}"
current_row = write_title_and_version_info(ws, title, version, protect_title=False)

children = seperate_simple_from_pydantic(ob)
Expand Down
6 changes: 1 addition & 5 deletions pydantic_schemas/utils/schema_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,5 @@

class SchemaBaseModel(BaseModel):
model_config = ConfigDict(
validate_assignment=True, protected_namespaces=(), use_enum_values=True, extra="forbid"
validate_assignment=True, protected_namespaces=(), use_enum_values=True, extra="ignore"
) # if a subclass has a model_config then this will be overridden

def __setitem__(self, key, value):
"""Allow dict like setting: Model[key] = value"""
setattr(self, key, value)
2 changes: 1 addition & 1 deletion pydantic_schemas/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,6 @@ def subset_pydantic_model(model: BaseModel, feature_names: List[str], name: Opti
input_dict = {k: v for k, v in model.model_dump(mode="json").items() if k in feature_names}
input_dict_standardized = standardize_keys_in_dict(input_dict)
try:
return SubModel(**input_dict_standardized)
return SubModel.model_validate(input_dict_standardized)
except:
raise ValueError(input_dict_standardized)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pandas = "^2.2.2"
numpy = "^2.1.0"
pydantic = "^2.8.0"
openpyxl = "^3.1.5"
certifi = "^2024.8.30"

[tool.poetry.group.dev.dependencies]
pytest = "^8.2.2"
Expand Down

0 comments on commit 5c508a6

Please sign in to comment.