Skip to content

Commit

Permalink
U/gblackadder/expose pydantic testing utils (#7)
Browse files Browse the repository at this point in the history
* expose pydantic testing utils

* fix issues with templates

* include backup options for decoding json

* fix bugs in test utils and include pretty print method

* minor change
  • Loading branch information
gblackadder authored Nov 27, 2024
1 parent 59791f4 commit 07d1802
Show file tree
Hide file tree
Showing 9 changed files with 473 additions and 170 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ To create a timeseries metadata object run
from metadataschemas import indicator_schema

indicator_metadata = indicator_schema.TimeseriesSchema(idno='project_idno',series_description=indicator_schema.SeriesDescription(idno='project_idno', name='project_name'))

indicator_metadata.pretty_print()
```
And the print statement will show you the metadata object in a pleasant format.
```python
TimeseriesSchema(
idno='project_idno',
series_description=series_description(
idno='project_idno',
name='project_name'
)
)
```

Depending on your IDE, selecting `TimeseriesSchema` could show you what fields the schema contains and their corresponding object definitions.
Expand All @@ -37,7 +49,7 @@ There are metadata objects for each of the following metadata types:
| indicator | `indicator_schema.TimeseriesSchema` |
| indicators_db | `indicators_db_schema.TimeseriesDatabaseSchema` |
| microdata | `microdata_schema.MicrodataSchema` |
| resource |`resource_schema.Model` |
| resource | `resource_schema.Model` |
| script | `script_schema.ResearchProjectSchemaDraft` |
| table | `table_schema.Model` |
| video | `video_schema.Model` |
Expand All @@ -61,7 +73,7 @@ mm = MetadataManager()

filename = mm.write_metadata_outline_to_excel('indicator')

filename = mm.save_metadata_to_excel('indicator', object=indicator_metadata)
filename = mm.save_metadata_to_excel(indicator_metadata)

# Then after you have updated the metadata in the Excel file

Expand All @@ -77,7 +89,7 @@ mm.metadata_type_names
microdata_type = mm.metadata_class_from_name("microdata")

# create an instantiated pydantic object and then fill in your data
microdata_metadata = mm.type_to_outline(metadata_type="microdata")
microdata_metadata = mm.create_metadata_outline("microdata")
microdata_metadata.repositoryid = "repository id"
microdata_metadata.study_desc.title_statement.idno = "project_idno"
```
Expand Down
55 changes: 54 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

144 changes: 132 additions & 12 deletions pydantic_schemas/metadata_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class MetadataManager:
"video": video_schema.Model,
}

_SCHEMA_TO_TYPE = {v: k for k, v in _TYPE_TO_SCHEMA.items()}

_TYPE_TO_WRITER = {
"document": write_across_many_sheets,
"geospatial": write_across_many_sheets,
Expand Down Expand Up @@ -73,6 +75,24 @@ class MetadataManager:
}

def metadata_class_from_name(self, metadata_name: str) -> Type[BaseModel]:
"""
Retrieve the pydantic model class for the given metadata type.
Args:
metadata_name (str): The name of the metadata type. Must be one of:
document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video
Returns:
Type[BaseModel]: The pydantic model class for the metadata type.
Raises:
ValueError: If the metadata name is not supported.
Example:
>>> from pydantic_schemas.metadata_manager import MetadataManager
>>> manager = MetadataManager()
>>> document_class = manager.metadata_class_from_name("document")
"""
metadata_name = self.standardize_metadata_name(metadata_name)
schema = self._TYPE_TO_SCHEMA[metadata_name]
return copy(schema)
Expand All @@ -82,6 +102,26 @@ def metadata_type_names(self) -> List[str]:
return list(self._TYPE_TO_SCHEMA.keys())

def standardize_metadata_name(self, metadata_name: str) -> str:
"""
Standardize the metadata name to a consistent format. In particular, it converts the name to lowercase and
replaces spaces and hyphens with underscores. It also maps certain metadata names to their standard
counterparts. For example, "survey" and "survey_microdata" are both mapped to "microdata". "timeseries" is
mapped to "indicator" and "timeseries_db" is mapped to "indicators_db".
Args:
metadata_name (str): The name of the metadata type.
Returns:
str: The standardized metadata name.
Raises:
ValueError: If the metadata name is not supported.
Example:
>>> from pydantic_schemas.metadata_manager import MetadataManager
>>> manager = MetadataManager()
>>> standardized_name = manager.standardize_metadata_name("Document")
"""
metadata_name = metadata_name.lower()
metadata_name = metadata_name.replace("-", "_").replace(" ", "_")
if metadata_name == "survey" or metadata_name == "survey_microdata":
Expand All @@ -96,6 +136,21 @@ def standardize_metadata_name(self, metadata_name: str) -> str:
def create_metadata_outline(
self, metadata_name_or_class: Union[str, Type[BaseModel]], debug: bool = False
) -> BaseModel:
"""
Create a skeleton pydantic object for the given metadata type.
Args:
metadata_name_or_class (str or type[BaseModel]): The name of the metadata type or the metadata class.
debug (bool): If True, print debug information on the skeleton creation.
Returns:
BaseModel: A pydantic object with the metadata schema and default values.
Example:
>>> from pydantic_schemas.metadata_manager import MetadataManager
>>> manager = MetadataManager()
>>> document_skeleton = manager.create_metadata_outline("document")
"""
if isinstance(metadata_name_or_class, str):
schema = self.metadata_class_from_name(metadata_name_or_class)
else:
Expand Down Expand Up @@ -123,13 +178,17 @@ def _get_name_version_schema_writer(self, metadata_name_or_class):
mappings. Otherwise, it assumes this is a template and retrieves the title from the class,
and uses a default single page writer function.
"""
if isinstance(metadata_name_or_class, str) or metadata_name_or_class in self._TYPE_TO_SCHEMA.values():
if (
isinstance(metadata_name_or_class, str)
or metadata_name_or_class in self._TYPE_TO_SCHEMA.values()
or type(metadata_name_or_class) in self._TYPE_TO_SCHEMA.values()
):
if isinstance(metadata_name_or_class, str):
metadata_name = self.standardize_metadata_name(metadata_name_or_class)
schema = self._TYPE_TO_SCHEMA[metadata_name]
else:
for metadata_name, schema in self._TYPE_TO_SCHEMA.items():
if schema is metadata_name_or_class:
if schema is metadata_name_or_class or schema is type(metadata_name_or_class):
break
version = f"{metadata_name} type metadata version {__version__}"
writer = self._TYPE_TO_WRITER[metadata_name]
Expand All @@ -145,6 +204,7 @@ def write_metadata_outline_to_excel(
metadata_name_or_class: Union[str, Type[BaseModel]],
filename: Optional[str] = None,
title: Optional[str] = None,
metadata_type: Optional[str] = None,
) -> str:
"""
Create an Excel file formatted for writing the given metadata_name metadata.
Expand All @@ -153,17 +213,32 @@ def write_metadata_outline_to_excel(
metadata_name_or_class (str or type[BaseModel]): the name of a supported metadata type, currently:
document, geospatial, image, indicator, indicators_db, microdata, resource, script, table, video
If passed as a BaseModel type, for instance this is what you would do with a template, then the writer
defaults to a single page.
is determined from the metadata_type. If the metadata_type is not provided, then the
writer defaults to write_to_single_sheet.
filename (Optional[str]): The path to the Excel file. If None, defaults to {metadata_name}_metadata.xlsx
title (Optional[str]): The title for the Excel sheet. If None, defaults to '{metadata_name} Metadata'
metadata_type (Optional[str]): The name of the metadata type, used if the metadata_name_or_class is
an instance of a template. For example 'geospatial', 'document' etc. The name is used to determine
the number of sheets in the Excel file.
Returns:
str: filename of metadata file
Outputs:
An Excel file into which metadata can be entered
"""
metadata_name, version, schema, writer = self._get_name_version_schema_writer(metadata_name_or_class)
# determine the metadata_name_or_class is a class instance or the actual class
if (
metadata_type is not None
and not isinstance(metadata_name_or_class, str)
and metadata_name_or_class not in self._TYPE_TO_SCHEMA.values()
and type(metadata_name_or_class) not in self._TYPE_TO_SCHEMA.values()
):
metadata_type = self.standardize_metadata_name(metadata_type)
_, _, _, writer = self._get_name_version_schema_writer(metadata_type)
metadata_name, version, schema, _ = self._get_name_version_schema_writer(metadata_name_or_class)
else:
metadata_name, version, schema, writer = self._get_name_version_schema_writer(metadata_name_or_class)
skeleton_object = self.create_metadata_outline(schema, debug=False)

if filename is None:
Expand All @@ -181,6 +256,7 @@ def save_metadata_to_excel(
object: BaseModel,
filename: Optional[str] = None,
title: Optional[str] = None,
metadata_type: Optional[str] = None,
verbose: bool = False,
) -> str:
"""
Expand All @@ -190,16 +266,30 @@ def save_metadata_to_excel(
object (BaseModel): The pydantic object to save to the Excel file.
filename (Optional[str]): The path to the Excel file. Defaults to {name}_metadata.xlsx
title (Optional[str]): The title for the Excel sheet. Defaults to '{name} Metadata'
metadata_type (Optional[str]): The name of the metadata type such as 'geospatial', 'document', etc. Used if
the metadata_name_or_class is an instance of a template. The name is used to determine the number of sheets
in the Excel file.
verbose (bool): If True, print debug information on the file creation.
Returns:
str: filename of metadata file
Outputs:
An Excel file containing the metadata from the pydantic object. This file can be updated as needed.
"""
metadata_name, version, schema, writer = self._get_name_version_schema_writer(
type(object)
) # metadata_name_or_class)
if (
metadata_type is not None
# and object not in self._TYPE_TO_SCHEMA.values()
and type(object) not in self._TYPE_TO_SCHEMA.values()
):
metadata_type = self.standardize_metadata_name(metadata_type)
_, _, _, writer = self._get_name_version_schema_writer(metadata_type)
metadata_name, version, schema, _ = self._get_name_version_schema_writer(type(object))
else:
metadata_name, version, schema, writer = self._get_name_version_schema_writer(type(object))
# metadata_name, version, schema, writer = self._get_name_version_schema_writer(
# type(object)
# ) # metadata_name_or_class)
skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False)

if filename is None:
Expand All @@ -212,6 +302,7 @@ def save_metadata_to_excel(
combined_dict = merge_dicts(
skeleton_object.model_dump(),
object.model_dump(exclude_none=False, exclude_unset=True, exclude_defaults=True),
skeleton_mode=True,
)
combined_dict = standardize_keys_in_dict(combined_dict)
new_ob = schema.model_validate(combined_dict)
Expand Down Expand Up @@ -249,7 +340,11 @@ def _get_metadata_name_from_excel_file(filename: str) -> str:
return cell_values[0]

def read_metadata_from_excel(
self, filename: str, metadata_class: Optional[Type[BaseModel]] = None, verbose: bool = False
self,
filename: str,
metadata_class: Optional[Type[BaseModel]] = None,
metadata_type: Optional[str] = None,
verbose: bool = False,
) -> BaseModel:
"""
Read in metadata from an appropriately formatted Excel file as a pydantic object.
Expand All @@ -258,9 +353,22 @@ def read_metadata_from_excel(
Args:
filename (str): The path to the Excel file.
metadata_class (Optional type of BaseModel): A pydantic class type correspondong to the type used to write the Excel file
metadata_type (Optional[str]): The name of the metadata type, such as 'geospatial', 'document', etc. Used if
the metadata_name_or_class is an instance of a template. The name is used to determine the number of
sheets in the Excel file.
verbose (bool): If True, print debug information on the file reading.
Returns:
BaseModel: a pydantic object containing the metadata from the file
Raises:
ValueError: If the metadata type is not supported or if the Excel file is improperly formatted
Example:
>>> from pydantic_schemas.metadata_manager import MetadataManager
>>> manager = MetadataManager()
>>> document_metadata = manager.read_metadata_from_excel("document_metadata.xlsx")
"""
metadata_name = self._get_metadata_name_from_excel_file(filename)
try:
Expand All @@ -273,15 +381,27 @@ def read_metadata_from_excel(
f"'{metadata_name}' not supported. Must be: {list(self._TYPE_TO_SCHEMA.keys())} or try passing in the metadata_class"
) from e
schema = metadata_class
reader = excel_single_sheet_to_pydantic
if metadata_type is not None:
metadata_type = self.standardize_metadata_name(metadata_type)
reader = self._TYPE_TO_READER[metadata_type]
else:
reader = excel_single_sheet_to_pydantic
if verbose:
print("reader is falling back to excel_single_sheet_to_pydantic")
read_object = reader(filename, schema, verbose=verbose)

skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=False)
skeleton_object = self.create_metadata_outline(metadata_name_or_class=schema, debug=verbose)

read_object_dict = read_object.model_dump(
mode="json", exclude_none=False, exclude_unset=True, exclude_defaults=True
)
if verbose:
print("read object dict", read_object_dict)

read_object_dict = read_object.model_dump(exclude_none=False, exclude_unset=True, exclude_defaults=True)
combined_dict = merge_dicts(
skeleton_object.model_dump(),
skeleton_object.model_dump(mode="json"),
read_object_dict,
skeleton_mode=True,
)
combined_dict = standardize_keys_in_dict(combined_dict)
new_ob = schema.model_validate(combined_dict)
Expand Down
Loading

0 comments on commit 07d1802

Please sign in to comment.