Skip to content

Commit

Permalink
Extract to excel (#19)
Browse files Browse the repository at this point in the history
* Fix utils testing

* Sorting files in utils and testing to avoid weird testing behaviors

* Added CLI to export to excel

* Fix data type printing to excel column

* Added examples for collection type and property type

Fix entities-to-json for the special case of PropertyTypeDef

* Fixed descriptions

* Add skip test
  • Loading branch information
JosePizarro3 authored Dec 17, 2024
1 parent 1cadc6d commit d1c65b4
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 11 deletions.
50 changes: 48 additions & 2 deletions bam_data_store/cli/cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import os

import click
from openpyxl import Workbook

from bam_data_store.cli.entities_to_excel import entities_to_excel
from bam_data_store.cli.entities_to_json import entities_to_json
from bam_data_store.logger import logger
from bam_data_store.utils import delete_and_create_dir, listdir_py_modules
from bam_data_store.utils import (
delete_and_create_dir,
import_module,
listdir_py_modules,
)


@click.group(help='Entry point to run `bam_data_store` CLI commands.')
Expand All @@ -14,6 +20,7 @@ def cli():

@cli.command(help='Export entities to JSON files to the `./artifacts/` folder.')
def export_entities_to_json():
# Get the directories from the Python modules and the export directory for the static artifacts
datamodel_dir = os.path.join('.', 'bam_data_store', 'datamodel')
export_dir = os.path.join('.', 'artifacts')

Expand All @@ -23,12 +30,51 @@ def export_entities_to_json():
# Get the Python modules to process the datamodel
py_modules = listdir_py_modules(directory_path=datamodel_dir, logger=logger)

# Process each module
# Process each module using the `to_json` method of each entity
for module_path in py_modules:
entities_to_json(module_path=module_path, export_dir=export_dir, logger=logger)

click.echo(f'All entity artifacts have been generated and saved to {export_dir}')


@cli.command(
help="""
Export entities to an Excel file in the path `./artifacts/masterdata.xlsx`.
""",
)
def export_entities_to_excel():
# Get the Python modules to process the datamodel
datamodel_dir = os.path.join('.', 'bam_data_store', 'datamodel')
py_modules = listdir_py_modules(directory_path=datamodel_dir, logger=logger)

# Load the definitions module classes
definitions_module = import_module(
module_path='./bam_data_store/metadata/definitions.py'
)

# Process the modules and save the entities to the openBIS masterdata Excel file
masterdata_file = os.path.join('.', 'artifacts', 'masterdata.xlsx')
wb = Workbook()
for i, module_path in enumerate(py_modules):
if i == 0:
ws = wb.active
else:
ws = wb.create_sheet()
ws.title = (
os.path.basename(module_path)
.capitalize()
.replace('.py', '')
.replace('_', ' ')
)
entities_to_excel(
worksheet=ws,
module_path=module_path,
definitions_module=definitions_module,
)
wb.save(masterdata_file)

click.echo(f'All masterdata have been generated and saved to {masterdata_file}')


if __name__ == '__main__':
cli()
76 changes: 76 additions & 0 deletions bam_data_store/cli/entities_to_excel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import inspect
import os
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
from openpyxl.worksheet.worksheet import Worksheet

from bam_data_store.utils import import_module


def entities_to_excel(
worksheet: 'Worksheet',
module_path: str,
definitions_module: Any,
) -> None:
"""
Export entities to the Excel file. The Python modules are imported using the function `import_module`,
and their contents are inspected (using `inspect`) to find the classes in the datamodel containing
`defs` and with a `to_json` method defined. Each row is then appended to the `worksheet`.
Args:
worksheet (Worksheet): The worksheet to append the entities.
module_path (str): Path to the Python module file.
definitions_module (Any): The module containing the definitions of the entities. This is used
to match the header definitions of the entities.
"""
def_members = inspect.getmembers(definitions_module, inspect.isclass)
module = import_module(module_path=module_path)
for _, obj in inspect.getmembers(module, inspect.isclass):
# Ensure the class has the `to_json` method
if not hasattr(obj, 'defs') or not callable(getattr(obj, 'to_json')):
continue

obj_instance = obj()

# Entity title
obj_definitions = obj_instance.defs
worksheet.append([obj_definitions.excel_name])

# Entity header definitions and values
for def_name, def_cls in def_members:
if def_name == obj_definitions.name:
break
worksheet.append(obj_definitions.excel_headers)
header_values = [
getattr(obj_definitions, f_set) for f_set in def_cls.model_fields.keys()
]
worksheet.append(header_values)

# Properties assignment for ObjectType
if obj_instance.entity_type == 'ObjectType':
if not obj_instance.properties:
continue
worksheet.append(obj_instance.properties[0].excel_headers)
for prop in obj_instance.properties:
row = []
for f_set in prop.model_fields.keys():
if f_set == 'data_type':
val = prop.data_type.value
else:
val = getattr(prop, f_set)
row.append(val)
worksheet.append(row)
# Terms assignment for VocabularyType
elif obj_instance.entity_type == 'VocabularyType':
if not obj_instance.terms:
continue
worksheet.append(obj_instance.terms[0].excel_headers)
for term in obj_instance.terms:
worksheet.append(
getattr(term, f_set) for f_set in term.model_fields.keys()
)

# ? do the PropertyTypeDef need to be exported to Excel?

worksheet.append(['']) # empty row after entity definitions
16 changes: 16 additions & 0 deletions bam_data_store/cli/entities_to_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import inspect
import json
import os
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -46,3 +47,18 @@ def entities_to_json(
click.echo(f'Saved JSON for class {name} to {output_file}')
except Exception as err:
click.echo(f'Failed to process class {name} in {module_path}: {err}')

# Special case of `PropertyTypeDef` in `property_types.py`
if 'property_types.py' in module_path:
for name, obj in inspect.getmembers(module):
if name.startswith('_') or name == 'PropertyTypeDef':
continue
try:
json_data = json.dumps(obj.model_dump(), indent=2)
output_file = os.path.join(module_export_dir, f'{obj.code}.json')
with open(output_file, 'w', encoding='utf-8') as f:
f.write(json_data)

click.echo(f'Saved JSON for class {name} to {output_file}')
except Exception as err:
click.echo(f'Failed to process class {name} in {module_path}: {err}')
54 changes: 54 additions & 0 deletions bam_data_store/datamodel/collection_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from bam_data_store.metadata.definitions import (
CollectionTypeDef,
PropertyTypeAssignment,
)
from bam_data_store.metadata.entities import CollectionType


class DefaultExperiment(CollectionType):
defs = CollectionTypeDef(
version=1,
code='DEFAULT_EXPERIMENT',
description="""
Default Experiment//Standard-Experiment
""",
)

name = PropertyTypeAssignment(
version=1,
code='$NAME',
data_type='VARCHAR',
property_label='Name',
description="""
Name
""",
mandatory=True,
show_in_edit_views=True,
section='General information',
)

grant = PropertyTypeAssignment(
version=1,
code='DEFAULT_EXPERIMENT.GRANT',
data_type='VARCHAR',
property_label='Grant',
description="""
Grant
""",
mandatory=False,
show_in_edit_views=True,
section='General information',
)

experimental_goals = PropertyTypeAssignment(
version=1,
code='DEFAULT_EXPERIMENT.EXPERIMENTAL_GOALS',
data_type='MULTILINE_VARCHAR',
property_label='Goals',
description="""
Goals of the experiment
""",
mandatory=False,
show_in_edit_views=True,
section='Experimental details',
)
11 changes: 11 additions & 0 deletions bam_data_store/datamodel/property_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from bam_data_store.metadata.definitions import PropertyTypeDef

Name = PropertyTypeDef(
version=1,
code='$NAME',
description="""
Name
""",
property_label='Name',
data_type='VARCHAR',
)
25 changes: 25 additions & 0 deletions bam_data_store/metadata/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,31 @@ def validate_code(cls, value: str) -> str:
def strip_description(cls, value: str) -> str:
return value.strip()

@property
def name(self) -> str:
return self.__class__.__name__

@property
def excel_name(self) -> str:
"""
Returns the name of the entity in a format suitable for the openBIS Excel file.
"""
name_map = {
'CollectionTypeDef': 'EXPERIMENT_TYPE',
'DataSetTypeDef': 'DATASET_TYPE',
'ObjectTypeDef': 'SAMPLE_TYPE',
'PropertyTypeDef': 'PROPERTY_TYPE',
'VocabularyTypeDef': 'VOCABULARY_TYPE',
}
return name_map.get(self.name)

@property
def excel_headers(self) -> list[str]:
"""
Returns the headers for the entity in a format suitable for the openBIS Excel file.
"""
return [k.capitalize().replace('_', ' ') for k in self.model_fields.keys()]


class BaseObjectTypeDef(EntityDef):
"""
Expand Down
21 changes: 16 additions & 5 deletions bam_data_store/metadata/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pydantic import BaseModel, ConfigDict, Field, model_validator

from bam_data_store.metadata.definitions import (
CollectionTypeDef,
ObjectTypeDef,
PropertyTypeAssignment,
VocabularyTerm,
Expand Down Expand Up @@ -89,6 +90,13 @@ def model_validator_after_init(cls, data: Any) -> Any:

return data

@property
def entity_type(self) -> str:
"""
Returns the entity type of the class as a string to speed up checks.
"""
return 'ObjectType'


class VocabularyType(BaseEntity):
"""
Expand Down Expand Up @@ -128,10 +136,13 @@ def model_validator_after_init(cls, data: Any) -> Any:

return data


class PropertyType(BaseEntity):
pass
@property
def entity_type(self) -> str:
"""
Returns the entity type of the class as a string to speed up checks.
"""
return 'VocabularyType'


class CollectionType(BaseEntity):
pass
class CollectionType(ObjectType):
model_config = ConfigDict(ignored_types=(CollectionTypeDef, PropertyTypeAssignment))
4 changes: 3 additions & 1 deletion bam_data_store/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import importlib.util
import os
import shutil
import sys
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
Expand Down Expand Up @@ -59,7 +60,8 @@ def listdir_py_modules(
return []

# Filter out files that start with '_'
return [f for f in files if not os.path.basename(f).startswith('_')]
# ! sorted in order to avoid using with OS sorting differently
return sorted([f for f in files if not os.path.basename(f).startswith('_')])


def import_module(module_path: str) -> Any:
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ maintainers = [
license = { file = "LICENSE" }
dependencies = [
"pybis<=1.36.3",
"pandas",
"openpyxl",
"click",
"pydantic",
Expand Down
9 changes: 7 additions & 2 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,13 @@ def test_listdir_py_modules(
if not listdir:
assert log_storage[0]['event'] == log_message
assert log_storage[0]['level'] == log_message_level
assert result == listdir
# when testing locally and with Github actions the order of the files is different --> `result` is sorted, so we also sort `listdir`
assert result == sorted(listdir)


@pytest.mark.skip(
reason='Very annoying to test this function, as any module we can use to be tested will change a lot in the future.'
)
def test_import_module():
"""Tests the `import_module` function."""
# testing only the possitive results
Expand All @@ -85,9 +89,10 @@ def test_import_module():
'importlib',
'os',
'shutil',
'sys',
]
assert [f[0] for f in inspect.getmembers(module, inspect.isclass)] == []
assert [f[0] for f in inspect.getmembers(module, inspect.isclass)] == [
assert [f[0] for f in inspect.getmembers(module, inspect.isfunction)] == [
'delete_and_create_dir',
'import_module',
'listdir_py_modules',
Expand Down

0 comments on commit d1c65b4

Please sign in to comment.