Skip to content

Commit

Permalink
Merge pull request #3 from worldbank/u/gblackadder/versioning
Browse files Browse the repository at this point in the history
U/gblackadder/versioning
  • Loading branch information
gblackadder authored Sep 27, 2024
2 parents abdaa02 + ff6b79e commit 617d09c
Show file tree
Hide file tree
Showing 23 changed files with 113 additions and 31 deletions.
30 changes: 26 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,34 @@ survey_metadata.study_desc.title_statement.idno = "project_idno"
```


## Updating Pydantic definitions and Excel sheets
## Updating Schemas

To update the pydantic schemas so that they match the latest json schemas run
First create a branch from the main branch.

Then make the change you want to the json schema in the schemas folder.

Then in pyproject.toml update the version number, changing either the major, minor or patch number as appropriate.

Next update the pydantic schemas so that they match the latest json schemas by running

`python pydantic_schemas/generators/generate_pydantic_schemas.py`

Then to update the Excel sheets run
Finally update the Excel sheets by running

`python pydantic_schemas/generators/generate_excel_files.py`

## Versioning conventions for schemas

### Major Changes

- field type changes that break convention and cannot be coerced such as a field moving from string to an array
- a mandatory field added or optional field changed to mandatory

### Minor Changes

- field removed
- optional field added

### Patch Changes

`python pydantic_schemas/generators/generate_excel_files.py`
- field type changes that can be coerced such as int to string
Binary file modified excel_sheets/Document_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Indicator_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Indicators_db_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Microdata_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Resource_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Script_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Table_metadata.xlsx
Binary file not shown.
Binary file modified excel_sheets/Video_metadata.xlsx
Binary file not shown.
1 change: 0 additions & 1 deletion pydantic_schemas/document_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: document-schema.json
# timestamp: 2024-09-13T19:00:20+00:00

from __future__ import annotations

Expand Down
Empty file.
83 changes: 81 additions & 2 deletions pydantic_schemas/generators/generate_excel_files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,75 @@
import os

import openpyxl

from pydantic_schemas.metadata_manager import MetadataManager


def compare_excel_files(file1, file2):
# Load the workbooks
wb1 = openpyxl.load_workbook(file1)
wb2 = openpyxl.load_workbook(file2)

# Get all sheet names
sheets1 = wb1.sheetnames
sheets2 = wb2.sheetnames

# Check if both workbooks have the same sheets
if sheets1 != sheets2:
print("Sheet names do not match")
print(f"File1 sheets: {sheets1}")
print(f"File2 sheets: {sheets2}")
return False

# Iterate through each sheet
for sheet_name in sheets1:
ws1 = wb1[sheet_name]
ws2 = wb2[sheet_name]

# Iterate through each cell in the sheet
for row in ws1.iter_rows():
for cell in row:
cell_address = cell.coordinate
if sheet_name == "metadata" and cell_address == "C1":
continue # Skip comparison for cell C1 in 'metadata' sheet which only contains the versioning number

differences = []
if ws1[cell_address].value != ws2[cell_address].value:
differences.append(f"Value: {ws1[cell_address].value} != {ws2[cell_address].value}")
if (
ws1[cell_address].font.name != ws2[cell_address].font.name
or ws1[cell_address].font.size != ws2[cell_address].font.size
or ws1[cell_address].font.bold != ws2[cell_address].font.bold
or ws1[cell_address].font.italic != ws2[cell_address].font.italic
):
differences.append(f"Font: {ws1[cell_address].font} != {ws2[cell_address].font}")
if (
ws1[cell_address].fill.start_color.index != ws2[cell_address].fill.start_color.index
or ws1[cell_address].fill.end_color.index != ws2[cell_address].fill.end_color.index
):
differences.append(f"Fill: {ws1[cell_address].fill} != {ws2[cell_address].fill}")
if (
ws1[cell_address].border.left.style != ws2[cell_address].border.left.style
or ws1[cell_address].border.right.style != ws2[cell_address].border.right.style
or ws1[cell_address].border.top.style != ws2[cell_address].border.top.style
or ws1[cell_address].border.bottom.style != ws2[cell_address].border.bottom.style
):
differences.append(f"Border: {ws1[cell_address].border} != {ws2[cell_address].border}")
if (
ws1[cell_address].alignment.horizontal != ws2[cell_address].alignment.horizontal
or ws1[cell_address].alignment.vertical != ws2[cell_address].alignment.vertical
):
differences.append(f"Alignment: {ws1[cell_address].alignment} != {ws2[cell_address].alignment}")

if differences:
print(f"Differences found at {sheet_name} {cell_address}:")
for difference in differences:
print(f" - {difference}")
return False

return True


metadata_manager = MetadataManager()

for metadata_name in metadata_manager.metadata_type_names:
Expand All @@ -10,5 +78,16 @@
filename = f"excel_sheets/{metadata_name.capitalize()}_metadata.xlsx"
print(f"Writing {metadata_name} outline to {filename}")
if os.path.exists(filename):
os.remove(filename)
metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename)
filename2 = f"excel_sheets/{metadata_name.capitalize()}_metadata2.xlsx"
metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename2)
are_identical = compare_excel_files(filename, filename2)
if are_identical:
print("they're the same")
os.remove(filename2)
else:
print("updating")
os.remove(filename)
os.rename(filename2, filename)
else:
metadata_manager.write_metadata_outline_to_excel(metadata_name_or_class=metadata_name, filename=filename)
print()
14 changes: 1 addition & 13 deletions pydantic_schemas/generators/generate_pydantic_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,6 @@
OUTPUT_DIR = os.path.join("pydantic_schemas")
PYTHON_VERSION = "3.11"
BASE_CLASS = ".utils.schema_base_model.SchemaBaseModel"
# INPUTS = [
# "document-schema.json",
# "geospatial-schema.json",
# "image-schema.json",
# "microdata-schema.json",
# "resource-schema.json",
# "script-schema.json",
# "table-schema.json",
# "timeseries-db-schema.json",
# "timeseries-schema.json",
# "video-schema.json",
# ]

INPUTS_TO_OUTPUTS = {
"document-schema.json": "document_schema.py",
Expand All @@ -38,7 +26,6 @@
for input_file, output_file in INPUTS_TO_OUTPUTS.items():
print(f"Generating pydantic schema for {input_file}")
input_path = os.path.join(SCHEMA_DIR, input_file)
# output_file = os.path.splitext(input_file)[0] + ".py"
output_path = os.path.join(OUTPUT_DIR, output_file).replace("-", "_")
run(
[
Expand All @@ -54,6 +41,7 @@
"--use-double-quotes",
"--wrap-string-literal",
"--collapse-root-models",
"--disable-timestamp",
"--base-class",
BASE_CLASS,
"--output",
Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/geospatial_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: geospatial-schema.json
# timestamp: 2024-09-13T19:00:22+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/image_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: image-schema.json
# timestamp: 2024-09-13T19:00:23+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/indicator_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: timeseries-schema.json
# timestamp: 2024-09-13T19:00:32+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/indicators_db_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: timeseries-db-schema.json
# timestamp: 2024-09-13T19:00:30+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/microdata_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: microdata-schema.json
# timestamp: 2024-09-13T19:00:25+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/resource_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: resource-schema.json
# timestamp: 2024-09-13T19:00:26+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/script_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: script-schema.json
# timestamp: 2024-09-13T19:00:27+00:00

from __future__ import annotations

Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/table_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: table-schema.json
# timestamp: 2024-09-13T19:00:29+00:00

from __future__ import annotations

Expand Down
7 changes: 5 additions & 2 deletions pydantic_schemas/utils/pydantic_to_excel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import copy
import importlib.metadata
import json
import os
from enum import Enum
from typing import List, Optional, Tuple, Union

__version__ = importlib.metadata.version("metadataschemas")

import pandas as pd
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side
Expand Down Expand Up @@ -423,7 +426,7 @@ def write_to_single_sheet(
title = model_default_name
wb = open_or_create_workbook(doc_filepath)
ws = create_sheet(wb, "metadata", sheet_number=0)
version = f"{metadata_type} type metadata version 20240812.1"
version = f"{metadata_type} type metadata version {__version__}"
current_row = write_title_and_version_info(ws, title, version, protect_title=False)
current_row = write_pydantic_to_sheet(ws, ob, current_row, debug=verbose)
correct_column_widths(worksheet=ws)
Expand All @@ -437,7 +440,7 @@ def write_across_many_sheets(
):
wb = open_or_create_workbook(doc_filepath)
ws = create_sheet(wb, "metadata", sheet_number=0)
version = f"{metadata_type} type metadata version 20240905.1"
version = f"{metadata_type} type metadata version {__version__}"
current_row = write_title_and_version_info(ws, title, version, protect_title=False)

children = seperate_simple_from_pydantic(ob)
Expand Down
1 change: 0 additions & 1 deletion pydantic_schemas/video_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# generated by datamodel-codegen:
# filename: video-schema.json
# timestamp: 2024-09-13T19:00:33+00:00

from __future__ import annotations

Expand Down

0 comments on commit 617d09c

Please sign in to comment.