BAMresearch · JosePizarro3 · Jan 10, 2025 · Dec 16, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.gitignore b/.gitignore
@@ -171,4 +171,7 @@ cython_debug/
 
 # pytest coverage
 pytest.xml
-pytest-coverage.txt
+pytest-coverage.txt
+
+# artifacts
+artifacts
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -57,7 +57,10 @@
         "cwd": "${workspaceFolder}",
         "program": "${workspaceFolder}/.venv/bin/bam_masterdata",
         "justMyCode": false,
-        "args": ["fill_masterdata"]
+        "args": [
+          "fill_masterdata",
+          // "--url=https://devel.datastore.bam.de/"
+        ]
       },
       {
         "name": "BM export-to-json",

diff --git a/bam_masterdata/cli/cli.py b/bam_masterdata/cli/cli.py
@@ -1,10 +1,23 @@
 import os
+import subprocess
 import time
 from pathlib import Path
 
 import click
+from decouple import config as environ
+from openpyxl import Workbook
 
+from bam_masterdata.cli.entities_to_excel import entities_to_excel
+from bam_masterdata.cli.entities_to_json import entities_to_json
 from bam_masterdata.cli.fill_masterdata import MasterdataCodeGenerator
+from bam_masterdata.logger import logger
+from bam_masterdata.utils import (
+    delete_and_create_dir,
+    import_module,
+    listdir_py_modules,
+)
+
+DATAMODEL_DIR = os.path.join(".", "bam_masterdata", "datamodel")
 
 
 @click.group(help="Entry point to run `bam_masterdata` CLI commands.")
@@ -14,19 +27,31 @@ def cli():
 
 @cli.command(
     name="fill_masterdata",
-    help="Fill the masterdata from the openBIS instance specified in the `.env` in the bam_masterdata/datamodel/ subfolder.",
+    help="Fill the masterdata from the openBIS instance and stores it in the bam_masterdata/datamodel/ modules.",
+)
+@click.option(
+    "--url",
+    type=str,
+    required=False,
+    help="""
+    (Optional) The URL of the openBIS instance from which to extract the data model. If not defined,
+    it is using the value of the `OPENBIS_URL` environment variable.
+    """,
 )
-def fill_masterdata():
+def fill_masterdata(url):
     start_time = time.time()
 
     # ! this takes a lot of time loading all the entities in Openbis
-    generator = MasterdataCodeGenerator()
+    # Use the URL if provided, otherwise fall back to defaults
+    if not url:
+        url = environ("OPENBIS_URL")
+    click.echo(f"Using the openBIS instance: {url}\n")
+    generator = MasterdataCodeGenerator(url=url)
 
     # Add each module to the `bam_masterdata/datamodel` directory
-    output_dir = os.path.join(".", "bam_masterdata", "datamodel")
     for module_name in ["property", "collection", "dataset", "object", "vocabulary"]:
         module_start_time = time.perf_counter()  # more precise time measurement
-        output_file = Path(os.path.join(output_dir, f"{module_name}_types.py"))
+        output_file = Path(os.path.join(DATAMODEL_DIR, f"{module_name}_types.py"))
 
         # Get the method from `MasterdataCodeGenerator`
         code = getattr(generator, f"generate_{module_name}_types")()
@@ -40,12 +65,76 @@ def fill_masterdata():
     elapsed_time = time.time() - start_time
     click.echo(f"Generated all types in {elapsed_time:.2f} seconds\n\n")
 
-    # ! this could be automated in the CLI
-    click.echo(
-        "Don't forget to apply ruff at the end after generating the files by doing:\n"
+    try:
+        # Run ruff check
+        click.echo("Running `ruff check .`...")
+        subprocess.run(["ruff", "check", "."], check=True)
+
+        # Run ruff format
+        click.echo("Running `ruff format .`...")
+        subprocess.run(["ruff", "format", "."], check=True)
+    except subprocess.CalledProcessError as e:
+        click.echo(f"Error during ruff execution: {e}", err=True)
+    else:
+        click.echo("Ruff checks and formatting completed successfully!")
+
+
+@cli.command(
+    name="export_to_json",
+    help="Export entities to JSON files to the `./artifacts/` folder.",
+)
+def export_to_json():
+    # Get the directories from the Python modules and the export directory for the static artifacts
+    export_dir = os.path.join(".", "artifacts")
+
+    # Delete and create the export directory
+    delete_and_create_dir(directory_path=export_dir, logger=logger)
+
+    # Get the Python modules to process the datamodel
+    py_modules = listdir_py_modules(directory_path=DATAMODEL_DIR, logger=logger)
+
+    # Process each module using the `to_json` method of each entity
+    for module_path in py_modules:
+        entities_to_json(module_path=module_path, export_dir=export_dir, logger=logger)
+
+    click.echo(f"All entity artifacts have been generated and saved to {export_dir}")
+
+
+@cli.command(
+    name="export_to_excel",
+    help="Export entities to an Excel file in the path `./artifacts/masterdata.xlsx`.",
+)
+def export_to_excel():
+    # Get the Python modules to process the datamodel
+    py_modules = listdir_py_modules(directory_path=DATAMODEL_DIR, logger=logger)
+
+    # Load the definitions module classes
+    definitions_module = import_module(
+        module_path="./bam_masterdata/metadata/definitions.py"
     )
-    click.echo("    ruff check .\n")
-    click.echo("    ruff format .\n")
+
+    # Process the modules and save the entities to the openBIS masterdata Excel file
+    masterdata_file = os.path.join(".", "artifacts", "masterdata.xlsx")
+    wb = Workbook()
+    for i, module_path in enumerate(py_modules):
+        if i == 0:
+            ws = wb.active
+        else:
+            ws = wb.create_sheet()
+        ws.title = (
+            os.path.basename(module_path)
+            .capitalize()
+            .replace(".py", "")
+            .replace("_", " ")
+        )
+        entities_to_excel(
+            worksheet=ws,
+            module_path=module_path,
+            definitions_module=definitions_module,
+        )
+    wb.save(masterdata_file)
+
+    click.echo(f"All masterdata have been generated and saved to {masterdata_file}")
 
 
 if __name__ == "__main__":

diff --git a/bam_masterdata/cli/entities_to_excel.py b/bam_masterdata/cli/entities_to_excel.py
@@ -0,0 +1,99 @@
+import inspect
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from openpyxl.worksheet.worksheet import Worksheet
+
+from bam_masterdata.utils import import_module
+
+
+def entities_to_excel(
+    worksheet: "Worksheet",
+    module_path: str,
+    definitions_module: Any,
+) -> None:
+    """
+    Export entities to the Excel file. The Python modules are imported using the function `import_module`,
+    and their contents are inspected (using `inspect`) to find the classes in the datamodel containing
+    `defs` and with a `to_json` method defined. Each row is then appended to the `worksheet`.
+
+    Args:
+        worksheet (Worksheet): The worksheet to append the entities.
+        module_path (str): Path to the Python module file.
+        definitions_module (Any): The module containing the definitions of the entities. This is used
+            to match the header definitions of the entities.
+    """
+    def_members = inspect.getmembers(definitions_module, inspect.isclass)
+    module = import_module(module_path=module_path)
+
+    # Special case of `PropertyTypeDef` in `property_types.py`
+    if "property_types.py" in module_path:
+        for name, obj in inspect.getmembers(module):
+            if name.startswith("_") or name == "PropertyTypeDef":
+                continue
+
+            # Entity title
+            worksheet.append([obj.excel_name])
+
+            # Entity header definitions and values
+            worksheet.append(obj.excel_headers)
+            row = []
+            for f_set in obj.model_fields.keys():
+                if f_set == "data_type":
+                    val = obj.data_type.value
+                else:
+                    val = getattr(obj, f_set)
+                row.append(val)
+            worksheet.append(row)
+            worksheet.append([""])  # empty row after entity definitions
+        return None
+
+    # All other datamodel modules
+    for _, obj in inspect.getmembers(module, inspect.isclass):
+        # Ensure the class has the `to_json` method
+        if not hasattr(obj, "defs") or not callable(getattr(obj, "to_json")):
+            continue
+
+        obj_instance = obj()
+
+        # Entity title
+        obj_definitions = obj_instance.defs
+        worksheet.append([obj_definitions.excel_name])
+
+        # Entity header definitions and values
+        for def_name, def_cls in def_members:
+            if def_name == obj_definitions.name:
+                break
+        worksheet.append(obj_definitions.excel_headers)
+        header_values = [
+            getattr(obj_definitions, f_set) for f_set in def_cls.model_fields.keys()
+        ]
+        worksheet.append(header_values)
+
+        # Properties assignment for ObjectType, DatasetType, and CollectionType
+        if obj_instance.cls_name in ["ObjectType", "DatasetType", "CollectionType"]:
+            if not obj_instance.properties:
+                continue
+            worksheet.append(obj_instance.properties[0].excel_headers)
+            for prop in obj_instance.properties:
+                row = []
+                for f_set in prop.model_fields.keys():
+                    if f_set == "data_type":
+                        val = prop.data_type.value
+                    else:
+                        val = getattr(prop, f_set)
+                    row.append(val)
+                worksheet.append(row)
+        # Terms assignment for VocabularyType
+        elif obj_instance.cls_name == "VocabularyType":
+            if not obj_instance.terms:
+                continue
+            worksheet.append(obj_instance.terms[0].excel_headers)
+            for term in obj_instance.terms:
+                worksheet.append(
+                    getattr(term, f_set) for f_set in term.model_fields.keys()
+                )
+
+        # ? do the PropertyTypeDef need to be exported to Excel?
+
+        worksheet.append([""])  # empty row after entity definitions
diff --git a/bam_masterdata/cli/entities_to_json.py b/bam_masterdata/cli/entities_to_json.py
@@ -0,0 +1,67 @@
+import inspect
+import json
+import os
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from structlog._config import BoundLoggerLazyProxy
+
+import click
+
+from bam_masterdata.utils import delete_and_create_dir, import_module
+
+
+def entities_to_json(
+    module_path: str, export_dir: str, logger: "BoundLoggerLazyProxy"
+) -> None:
+    """
+    Export entities to JSON files. The Python modules are imported using the function `import_module`,
+    and their contents are inspected (using `inspect`) to find the classes in the datamodel containing
+    `defs` and with a `to_json` method defined.
+
+    Args:
+        module_path (str): Path to the Python module file.
+        export_dir (str): Path to the directory where the JSON files will be saved.
+        logger (BoundLoggerLazyProxy): The logger to log messages.
+    """
+    module = import_module(module_path=module_path)
+    # export to specific subfolders for each type of entity (each module)
+    module_export_dir = os.path.join(
+        export_dir, os.path.basename(module_path).replace(".py", "")
+    )
+    delete_and_create_dir(directory_path=module_export_dir, logger=logger)
+
+    # Special case of `PropertyTypeDef` in `property_types.py`
+    if "property_types.py" in module_path:
+        for name, obj in inspect.getmembers(module):
+            if name.startswith("_") or name == "PropertyTypeDef":
+                continue
+            try:
+                json_data = json.dumps(obj.model_dump(), indent=2)
+                output_file = os.path.join(module_export_dir, f"{obj.code}.json")
+                with open(output_file, "w", encoding="utf-8") as f:
+                    f.write(json_data)
+
+                click.echo(f"Saved JSON for class {name} to {output_file}")
+            except Exception as err:
+                click.echo(f"Failed to process class {name} in {module_path}: {err}")
+        return None
+
+    # All other datamodel modules
+    for name, obj in inspect.getmembers(module, inspect.isclass):
+        # Ensure the class has the `to_json` method
+        if not hasattr(obj, "defs") or not callable(getattr(obj, "to_json")):
+            continue
+
+        try:
+            # Instantiate the class and call the method
+            json_data = obj().to_json(indent=2)
+
+            # Write JSON data to file
+            output_file = os.path.join(module_export_dir, f"{obj.defs.code}.json")
+            with open(output_file, "w", encoding="utf-8") as f:
+                f.write(json_data)
+
+            click.echo(f"Saved JSON for class {name} to {output_file}")
+        except Exception as err:
+            click.echo(f"Failed to process class {name} in {module_path}: {err}")
diff --git a/bam_masterdata/cli/fill_masterdata.py b/bam_masterdata/cli/fill_masterdata.py
@@ -3,6 +3,7 @@
 import click
 
 from bam_masterdata.openbis import OpenbisEntities
+from bam_masterdata.openbis.login import environ
 
 
 class MasterdataCodeGenerator:
@@ -11,14 +12,14 @@ class MasterdataCodeGenerator:
     openBIS instance.
     """
 
-    def __init__(self):
+    def __init__(self, url: str = ""):
         start_time = time.time()
         # * This part takes some time due to the loading of all entities from Openbis
-        self.properties = OpenbisEntities().get_property_dict()
-        self.collections = OpenbisEntities().get_collection_dict()
-        self.datasets = OpenbisEntities().get_dataset_dict()
-        self.objects = OpenbisEntities().get_object_dict()
-        self.vocabularies = OpenbisEntities().get_vocabulary_dict()
+        self.properties = OpenbisEntities(url=url).get_property_dict()
+        self.collections = OpenbisEntities(url=url).get_collection_dict()
+        self.datasets = OpenbisEntities(url=url).get_dataset_dict()
+        self.objects = OpenbisEntities(url=url).get_object_dict()
+        self.vocabularies = OpenbisEntities(url=url).get_vocabulary_dict()
         elapsed_time = time.time() - start_time
         click.echo(
             f"Loaded OpenBIS entities in `MasterdataCodeGenerator` initialization {elapsed_time:.2f} seconds\n"
@@ -103,7 +104,7 @@ def add_properties(
             # ! patching dataType=SAMPLE instead of OBJECT
             if prop_data.get("dataType", "") == "SAMPLE":
                 prop_data["dataType"] = "OBJECT"
-            lines.append(f"        data_type=\"{prop_data.get('dataType', '')}\",")
+            lines.append(f'        data_type="{prop_data.get("dataType", "")}",')
             property_label = (prop_data.get("label") or "").replace("\n", "\\n")
             lines.append(f'        property_label="{property_label}",')
             description = (
@@ -163,7 +164,7 @@ def generate_property_types(self) -> str:
             # ! patching dataType=SAMPLE instead of OBJECT
             if data.get("dataType", "") == "SAMPLE":
                 data["dataType"] = "OBJECT"
-            lines.append(f"    data_type=\"{data.get('dataType', '')}\",")
+            lines.append(f'    data_type="{data.get("dataType", "")}",')
             property_label = (
                 (data.get("label") or "").replace('"', '\\"').replace("\n", "\\n")
             )
@@ -222,7 +223,7 @@ def generate_collection_types(self) -> str:
             lines.append(f'        description="""{description}""",')
             if data.get("validationPlugin") != "":
                 lines.append(
-                    f"        validation_script=\"{data.get('validationPlugin')}\","
+                    f'        validation_script="{data.get("validationPlugin")}",'
                 )
             lines.append("    )")
             lines.append("")
@@ -327,7 +328,7 @@ def generate_object_types(self) -> str:
             )
             lines.append(f'        description="""{description}""",')
             lines.append(
-                f"        generated_code_prefix=\"{data.get('generatedCodePrefix', '')}\","
+                f'        generated_code_prefix="{data.get("generatedCodePrefix", "")}",'
             )
             lines.append("    )")
             lines.append("")