From 91f092ef4efa385f470011fe481139e65cadd532 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Fri, 10 Jan 2025 11:24:40 +0100 Subject: [PATCH 01/29] file structure --- cea/datamanagement/format_helper/__init__.py | 0 .../format_helper/cea4_migrate.py | 20 +++++++++++++++++++ .../format_helper/cea4_verify.py | 20 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 cea/datamanagement/format_helper/__init__.py create mode 100644 cea/datamanagement/format_helper/cea4_migrate.py create mode 100644 cea/datamanagement/format_helper/cea4_verify.py diff --git a/cea/datamanagement/format_helper/__init__.py b/cea/datamanagement/format_helper/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py new file mode 100644 index 0000000000..de7f983044 --- /dev/null +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -0,0 +1,20 @@ +""" +Mirgate the format of the input data to CEA-4 format after verification. + +""" + +import cea.inputlocator +import os +import cea.config +import time +import geopandas as gpd + + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py new file mode 100644 index 0000000000..5e841a1db8 --- /dev/null +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -0,0 +1,20 @@ +""" +Verify the format of the input data for CEA-4 model. + +""" + +import cea.inputlocator +import os +import cea.config +import time +import geopandas as gpd + + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" From bd61d40d8488e48f8543a367c490037fd74d8136 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Fri, 10 Jan 2025 12:20:22 +0100 Subject: [PATCH 02/29] config and scripts ready --- .../format_helper/format_helper.py | 20 +++++++++++++++++++ cea/default.config | 10 +++++++++- cea/scripts.yml | 9 +++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 cea/datamanagement/format_helper/format_helper.py diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py new file mode 100644 index 0000000000..4a8f8135e4 --- /dev/null +++ b/cea/datamanagement/format_helper/format_helper.py @@ -0,0 +1,20 @@ +""" +Main script of the formate helper that activates the verification and migration as needed. + +""" + +import cea.inputlocator +import os +import cea.config +import time +import geopandas as gpd + + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" diff --git a/cea/default.config b/cea/default.config index 7a61de74b8..77494a1181 100644 --- a/cea/default.config +++ b/cea/default.config @@ -610,11 +610,19 @@ variable-5-upper-bound.type = RealParameter variable-5-upper-bound.help = Upper bound of Variable 5. variable-5-upper-bound.nullable = true +[format-helper] +scenarios-to-verify-and-migrate = +scenarios-to-verify-and-migrate.type = ScenarioNameMultiChoiceParameter +scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and migrate). Leave blank to select all Scenarios. + +migrate-from-cea-3 = true +migrate-from-cea-3.type = BooleanParameter +migrate-from-cea-3.help = True if migrate the inputs' format from CEA-3, if detected, to the current version of CEA-4. The process of migration is not reversible. [batch-process-workflow] scenarios-to-simulate = scenarios-to-simulate.type = ScenarioNameMultiChoiceParameter -scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all scenarios. Consider excluding the reference-Scenario when import-from-rhino-gh is set to True. +scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all Scenarios. Exclude the reference-Scenario when import-from-rhino-gh is set to True. export-to-rhino-gh = false export-to-rhino-gh.type = BooleanParameter diff --git a/cea/scripts.yml b/cea/scripts.yml index 964d870c0a..c5329093d1 100644 --- a/cea/scripts.yml +++ b/cea/scripts.yml @@ -357,6 +357,15 @@ Data Management: Utilities: + - name: cea4-format-helper + label: CEA-4 Format Helper + description: "Verifies the inputs are in the correct format for CEA-4. + Migrates late CEA-3 inputs when detected - Note the process of migration is not reversible. + " + interfaces: [cli, dashboard] + module: cea.datamanagement.format_helper.format_helper + parameters: ['general:scenario',format-helper] + - name: sensitivity-analysis-sampler label: Generate Samples for Sensitivity Analysis (SA) description: Generate samples for sensitivity analysis using Sobol Method. From c636d24c3062bdc5a6550bc7ac5f353ed42902dc Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Fri, 10 Jan 2025 12:22:14 +0100 Subject: [PATCH 03/29] some wording changes --- cea/default.config | 2 +- cea/scripts.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cea/default.config b/cea/default.config index 77494a1181..7aed8cb63f 100644 --- a/cea/default.config +++ b/cea/default.config @@ -617,7 +617,7 @@ scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and mig migrate-from-cea-3 = true migrate-from-cea-3.type = BooleanParameter -migrate-from-cea-3.help = True if migrate the inputs' format from CEA-3, if detected, to the current version of CEA-4. The process of migration is not reversible. +migrate-from-cea-3.help = True if migrate the inputs' format from CEA-3 to the current version of CEA-4. The process of migration is not reversible. [batch-process-workflow] scenarios-to-simulate = diff --git a/cea/scripts.yml b/cea/scripts.yml index c5329093d1..721b0ec923 100644 --- a/cea/scripts.yml +++ b/cea/scripts.yml @@ -360,7 +360,7 @@ Utilities: - name: cea4-format-helper label: CEA-4 Format Helper description: "Verifies the inputs are in the correct format for CEA-4. - Migrates late CEA-3 inputs when detected - Note the process of migration is not reversible. + Migrates Late-CEA-3 inputs when detected - Note the process of migration is not reversible. " interfaces: [cli, dashboard] module: cea.datamanagement.format_helper.format_helper From ee2963c27368c9ce073dd93a19cd843452576106 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Fri, 10 Jan 2025 15:40:59 +0100 Subject: [PATCH 04/29] Update cea4_verify.py --- .../format_helper/cea4_verify.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 5e841a1db8..cd9073b6f5 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -18,3 +18,42 @@ __maintainer__ = "Reynold Mok" __email__ = "cea@arch.ethz.ch" __status__ = "Production" + + +## -------------------------------------------------------------------------------------------------------------------- +## The paths to the input files +## -------------------------------------------------------------------------------------------------------------------- + +# The paths are relatively hardcoded for now without using the inputlocator script. +# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script. +def path_to_input_file_without_db(scenario, item): + + if item == "zone": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp") + elif item == "surroundings": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp") + elif item == "air_conditioning": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.csv") + elif item == "architecture": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.csv") + elif item == "indoor_comfort": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.csv") + elif item == "internal_loads": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.csv") + elif item == "supply_systems": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.csv") + elif item == 'networks': + path_to_input_file = os.path.join(scenario, "inputs", "network", "networks.csv") + elif item == 'terrain': + path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") + elif item == 'weather': + path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + + return path_to_input_file + +## -------------------------------------------------------------------------------------------------------------------- +## Unique traits for the CEA-4 format +## -------------------------------------------------------------------------------------------------------------------- + +#1. about zone.shp +ZONE_SHP_COLUMNS = ['Name', 'height_ag', 'floors_ag', 'height_bg', 'floors_bg', ] From 870df5369f499f49386a14a5a97a21ac79ea2a3d Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Fri, 10 Jan 2025 18:29:46 +0100 Subject: [PATCH 05/29] name duplicates --- .../format_helper/cea4_verify.py | 292 +++++++++++++++++- 1 file changed, 288 insertions(+), 4 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index cd9073b6f5..f89ee0755e 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -8,6 +8,7 @@ import cea.config import time import geopandas as gpd +import pandas as pd __author__ = "Zhongming Shi" @@ -42,8 +43,8 @@ def path_to_input_file_without_db(scenario, item): path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.csv") elif item == "supply_systems": path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.csv") - elif item == 'networks': - path_to_input_file = os.path.join(scenario, "inputs", "network", "networks.csv") + elif item == 'streets': + path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.csv") elif item == 'terrain': path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") elif item == 'weather': @@ -51,9 +52,292 @@ def path_to_input_file_without_db(scenario, item): return path_to_input_file + +## -------------------------------------------------------------------------------------------------------------------- +## Helper functions +## -------------------------------------------------------------------------------------------------------------------- + +def verify_shp(scenario, item, required_attributes): + """ + Verify if a shapefile contains all required attributes. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Either "zone" or "surroundings". + required_attributes (list): List of attribute names to verify. + + Returns: + A list of missing attributes, or an empty list if all attributes are present. + """ + # Construct the shapefile path + shapefile_path = path_to_input_file_without_db(scenario, item) + + # Check if the shapefile exists + if not os.path.isfile(shapefile_path): + raise FileNotFoundError(f"Shapefile not found: {shapefile_path}") + + # Load the shapefile + try: + gdf = gpd.read_file(shapefile_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + + # Get the column names from the shapefile's attribute table + shapefile_columns = gdf.columns.tolist() + + # Check for missing attributes + missing_attributes = [attr for attr in required_attributes if attr not in shapefile_columns] + + return missing_attributes + + +def verify_csv(scenario, item, required_columns): + """ + Verify if a CSV file contains all required columns. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Identifier for the CSV file. + required_columns (list): List of column names to verify. + + Returns: + A list of missing columns, or an empty list if all columns are present. + """ + # Construct the CSV file path + csv_path = path_to_input_file_without_db(scenario, item) + + # Check if the CSV file exists + if not os.path.isfile(csv_path): + raise FileNotFoundError(f"CSV file not found: {csv_path}") + + # Load the CSV file + try: + df = pd.read_csv(csv_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + + # Get the column names from the CSV file + csv_columns = df.columns.tolist() + + # Check for missing columns + missing_columns = [col for col in required_columns if col not in csv_columns] + + return missing_columns + + +def verify_file_exists(scenario, items): + """ + Verify if the files in the provided list exist for a given scenario. + + Parameters: + scenario (str): Path or identifier for the scenario. + items (list): List of file identifiers to check. + + Returns: + list: A list of missing file identifiers, or an empty list if all files exist. + """ + list_missing_files = [] + for file in items: + path = path_to_input_file_without_db(scenario, file) + if not os.path.isfile(path): + list_missing_files.append(file) + return list_missing_files + + +def verify_name_duplicates(scenario, item): + """ + Verify if there are duplicate names in the 'name' column of a .csv or .shp file. + + Parameters: + file_path (str): Path to the input file (either .csv or .shp). + + Returns: + list: A list of duplicate names, or an empty list if no duplicates are found. + """ + # Construct the CSV file path + file_path = path_to_input_file_without_db(scenario, item) + + # Check file type and load as a DataFrame + if file_path.endswith('.csv'): + try: + df = pd.read_csv(file_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + elif file_path.endswith('.shp'): + try: + df = gpd.read_file(file_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + else: + raise ValueError("Unsupported file type. Please provide a .csv or .shp file.") + + # Find duplicate names + list_names_duplicated = df['name'][df['name'].duplicated()].tolist() + + return list_names_duplicated + + ## -------------------------------------------------------------------------------------------------------------------- ## Unique traits for the CEA-4 format ## -------------------------------------------------------------------------------------------------------------------- -#1. about zone.shp -ZONE_SHP_COLUMNS = ['Name', 'height_ag', 'floors_ag', 'height_bg', 'floors_bg', ] +def cea4_verify(scenario): + + #0. get the scenario name + scenario_name = os.path.basename(scenario) + + #1. about zone.shp and surroundings.shp + SHAPEFILES = ['zone', 'surroundings'] + COLUMNS_ZONE = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', + 'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r'] + COLUMNS_SURROUNDINGS = ['name', 'height_ag', 'floors_ag'] + + list_missing_attributes_zone = [] + list_missing_attributes_surroundings = [] + + list_missing_files_shp_building_geometries = verify_file_exists(scenario, SHAPEFILES) + if list_missing_files_shp_building_geometries: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometries)) + if 'zone' not in list_missing_files_shp_building_geometries: + list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE) + if list_missing_attributes_zone: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + if 'name' in list_missing_attributes_zone: + list_names_duplicated = verify_name_duplicates(scenario, 'zone') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'surroundings' not in list_missing_files_shp_building_geometries: + list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS) + if list_missing_attributes_surroundings: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + + #2. about .csv files under the "inputs/building-properties" folder + CSV_BUILDING_PROPERTIES = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] + COLUMNS_AIR_CONDITIONING = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] + COLUMNS_ARCHITECTURE = ['name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] + COLUMNS_INDOOR_COMFORT = ['name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] + COLUMNS_INTERNAL_LOADS = ['name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] + COLUMNS_SUPPLY_SYSTEMS = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] + + list_missing_columns_air_conditioning = [] + list_missing_columns_architecture = [] + list_missing_columns_indoor_comfort = [] + list_missing_columns_internal_loads = [] + list_missing_columns_supply_systems = [] + + list_missing_files_csv_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES) + if list_missing_files_csv_building_properties: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + + if 'air_conditioning' not in list_missing_files_csv_building_properties: + list_missing_columns_air_conditioning = verify_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING) + if list_missing_columns_air_conditioning: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + if 'name' in list_missing_columns_air_conditioning: + list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'architecture' not in list_missing_files_csv_building_properties: + list_missing_columns_architecture = verify_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE) + if list_missing_columns_architecture: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + if 'name' in list_missing_columns_architecture: + list_names_duplicated = verify_name_duplicates(scenario, 'architecture') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'indoor_comfort' not in list_missing_files_csv_building_properties: + list_missing_columns_indoor_comfort = verify_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT) + if list_missing_columns_indoor_comfort: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + if 'name' in list_missing_columns_indoor_comfort: + list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'internal_loads' not in list_missing_files_csv_building_properties: + list_missing_columns_internal_loads = verify_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS) + if list_missing_columns_internal_loads: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + if 'name' in list_missing_columns_internal_loads: + list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'supply_systems' not in list_missing_files_csv_building_properties: + list_missing_columns_supply_systems = verify_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS) + if list_missing_columns_supply_systems: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + if 'name' in list_missing_columns_supply_systems: + list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + + + #3. verify if terrain.tif, weather.epw and streets.shp exist + list_missing_files_terrain = verify_file_exists(scenario, ['terrain']) + if list_missing_files_terrain: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') + + list_missing_files_weather = verify_file_exists(scenario, ['weather']) + if list_missing_files_weather: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') + + list_missing_files_streets = verify_file_exists(scenario, ['streets']) + if list_missing_files_streets: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, when thermal networks analysis is required.') + + #4. verify the DB under the "inputs/technology/" folder + list_missing_files_db = [] + + # Compile the results + dict_missing = { + 'zone': list_missing_attributes_zone, + 'surroundings': list_missing_attributes_surroundings, + 'building-properties': list_missing_files_csv_building_properties, + 'air_conditioning': list_missing_columns_air_conditioning, + 'architecture': list_missing_columns_architecture, + 'indoor_comfort': list_missing_columns_indoor_comfort, + 'internal_loads': list_missing_columns_internal_loads, + 'supply_systems': list_missing_columns_supply_systems, + 'terrain': list_missing_files_terrain, + 'weather': list_missing_files_weather, + 'streets': list_missing_files_streets, + 'db': list_missing_files_db + } + + if all(not value for value in dict_missing.values()): + print('For Scenario: {scenario},'.format(scenario=scenario_name), + 'input building-geometries ShapeFiles: [zone.shp and surroundings.shp], ' + 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES), + 'are all verified as present and compatible with the current version of CEA-4.' + ) + + + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + + +def main(config): + # Start the timer + t0 = time.perf_counter() + locator = cea.inputlocator.InputLocator(scenario=config.scenario) + assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + + # Execute the verification + dict_missing = cea4_verify(scenario=config.scenario) + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + print('The entire process of CEA-4 format verification is now completed - time elapsed: %d.2 seconds' % time_elapsed) + +if __name__ == '__main__': + main(cea.config.Configuration()) From 6f0807d053d451696192ceca95d17b1ad41b794a Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sat, 11 Jan 2025 13:48:50 +0100 Subject: [PATCH 06/29] before editing the migration script --- cea/datamanagement/format_helper/cea4_verify.py | 2 +- cea/default.config | 6 +++--- cea/scripts.yml | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index f89ee0755e..5cbd98bf35 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -279,7 +279,7 @@ def cea4_verify(scenario): if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - + #3. verify if terrain.tif, weather.epw and streets.shp exist list_missing_files_terrain = verify_file_exists(scenario, ['terrain']) if list_missing_files_terrain: diff --git a/cea/default.config b/cea/default.config index 7aed8cb63f..50fc46ada4 100644 --- a/cea/default.config +++ b/cea/default.config @@ -360,9 +360,9 @@ operational.type = BooleanParameter operational.help = Estimate Operational costs (variable and fixed) due to supply systems operation (allocated according to the supply-systems input database) [emissions] -year-to-calculate = 2024 +year-to-calculate = 2025 year-to-calculate.type = IntegerParameter -year-to-calculate.help = Defines the time period (year-to-calculate - year_built or year_renovated) over which embodied emissions are calculated. If greater than 60 years, embodied emissions are not considered. +year-to-calculate.help = Defines the year to calculate the emissions. If a building is greater than 60 years, embodied emissions are not considered. embodied = true embodied.type = BooleanParameter @@ -617,7 +617,7 @@ scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and mig migrate-from-cea-3 = true migrate-from-cea-3.type = BooleanParameter -migrate-from-cea-3.help = True if migrate the inputs' format from CEA-3 to the current version of CEA-4. The process of migration is not reversible. +migrate-from-cea-3.help = True to auto-migrate the input format to the current version of CEA-4. The process of migration is not reversible. [batch-process-workflow] scenarios-to-simulate = diff --git a/cea/scripts.yml b/cea/scripts.yml index 721b0ec923..bda3e42c4c 100644 --- a/cea/scripts.yml +++ b/cea/scripts.yml @@ -359,9 +359,10 @@ Utilities: - name: cea4-format-helper label: CEA-4 Format Helper - description: "Verifies the inputs are in the correct format for CEA-4. - Migrates Late-CEA-3 inputs when detected - Note the process of migration is not reversible. - " + description: | + Verifies the inputs are in the correct format for CEA-4. + Migrates Late-CEA-3 inputs data - Note the process of migration is not reversible. + interfaces: [cli, dashboard] module: cea.datamanagement.format_helper.format_helper parameters: ['general:scenario',format-helper] From cbd6b16349a781c22289c2c424ff445bb93b39a4 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sat, 11 Jan 2025 15:33:26 +0100 Subject: [PATCH 07/29] zone+typology --- .../format_helper/cea4_migrate.py | 99 +++++++++++++++++++ .../format_helper/cea4_verify.py | 37 +++---- .../format_helper/format_helper.py | 1 + 3 files changed, 119 insertions(+), 18 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index de7f983044..89f9729519 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -7,6 +7,7 @@ import os import cea.config import time +import pandas as pd import geopandas as gpd @@ -18,3 +19,101 @@ __maintainer__ = "Reynold Mok" __email__ = "cea@arch.ethz.ch" __status__ = "Production" + +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_file_exists, verify_csv +from cea.utilities.dbf import dbf_to_dataframe + + +## -------------------------------------------------------------------------------------------------------------------- +## The paths to the input files for CEA-3 +## -------------------------------------------------------------------------------------------------------------------- + +# The paths are relatively hardcoded for now without using the inputlocator script. +# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script. +def path_to_input_file_without_db_3(scenario, item): + + if item == "zone": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp") + elif item == "surroundings": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp") + elif item == "air_conditioning": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.dbf") + elif item == "architecture": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.dbf") + elif item == "indoor_comfort": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.dbf") + elif item == "internal_loads": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.dbf") + elif item == "supply_systems": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.dbf") + elif item == "typology": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "typology.dbf") + elif item == 'streets': + path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp") + elif item == 'terrain': + path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") + elif item == 'weather': + path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + + return path_to_input_file + + +## -------------------------------------------------------------------------------------------------------------------- +## Helper functions +## -------------------------------------------------------------------------------------------------------------------- + + +## -------------------------------------------------------------------------------------------------------------------- +## Migrate to CEA-4 format from CEA-3 format +## -------------------------------------------------------------------------------------------------------------------- + +def migrate_cea3_to_cea4(scenario): + + # Create the list of items that has been changed from CEA-3 to CEA-4 + list_items_changed = ['zone', 'surroundings', + 'air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', + 'typology'] + dict_missing = cea4_verify(scenario) + + #0. get the scenario name + scenario_name = os.path.basename(scenario) + + #1. about zone.shp and surroundings.shp + COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag'] + COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] + COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag'] + columns_mapping_dict_name = {'Name': 'name'} + columns_mapping_dict_typology = {'YEAR': 'year', + 'STANDARD': 'const_type', + '1ST_USE': 'use_type1', + '1ST_USE_R': 'use_type1r', + '2ND_USE': 'use_type2', + '2ND_USE_R': 'use_type2r', + '3RD_USE': 'use_type3', + '3RD_USE_R': 'use_type3r' + } + + list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') + list_missing_files_typology = verify_file_exists(scenario, ['typology']) + list_missing_attributes_zone_4 = dict_missing.get('zone') + + if 'zone' not in list_missing_files_shp_building_geometry: + list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) + if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') + zone_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) + zone_df.rename(columns=columns_mapping_dict_name, inplace=True) + if 'typology' not in list_missing_files_typology: + list_missing_attributes_typology_3 = verify_csv(scenario, 'typology', COLUMNS_TYPOLOGY_3) + if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') + typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) + zone_df_4 = pd.merge(zone_df, typology_df, left_on=['name'], right_on=["Name"], how='left') + zone_df_4.drop(columns=['Name'], inplace=True) + typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) + else: + raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration.') + elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-4 format.') + else: + raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with migration.') diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 5cbd98bf35..d23c5929c8 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -22,12 +22,12 @@ ## -------------------------------------------------------------------------------------------------------------------- -## The paths to the input files +## The paths to the input files for CEA-4 ## -------------------------------------------------------------------------------------------------------------------- # The paths are relatively hardcoded for now without using the inputlocator script. # This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script. -def path_to_input_file_without_db(scenario, item): +def path_to_input_file_without_db_4(scenario, item): if item == "zone": path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp") @@ -44,7 +44,7 @@ def path_to_input_file_without_db(scenario, item): elif item == "supply_systems": path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.csv") elif item == 'streets': - path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.csv") + path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp") elif item == 'terrain': path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") elif item == 'weather': @@ -70,7 +70,7 @@ def verify_shp(scenario, item, required_attributes): A list of missing attributes, or an empty list if all attributes are present. """ # Construct the shapefile path - shapefile_path = path_to_input_file_without_db(scenario, item) + shapefile_path = path_to_input_file_without_db_4(scenario, item) # Check if the shapefile exists if not os.path.isfile(shapefile_path): @@ -104,7 +104,7 @@ def verify_csv(scenario, item, required_columns): A list of missing columns, or an empty list if all columns are present. """ # Construct the CSV file path - csv_path = path_to_input_file_without_db(scenario, item) + csv_path = path_to_input_file_without_db_4(scenario, item) # Check if the CSV file exists if not os.path.isfile(csv_path): @@ -138,7 +138,7 @@ def verify_file_exists(scenario, items): """ list_missing_files = [] for file in items: - path = path_to_input_file_without_db(scenario, file) + path = path_to_input_file_without_db_4(scenario, file) if not os.path.isfile(path): list_missing_files.append(file) return list_missing_files @@ -155,7 +155,7 @@ def verify_name_duplicates(scenario, item): list: A list of duplicate names, or an empty list if no duplicates are found. """ # Construct the CSV file path - file_path = path_to_input_file_without_db(scenario, item) + file_path = path_to_input_file_without_db_4(scenario, item) # Check file type and load as a DataFrame if file_path.endswith('.csv'): @@ -188,26 +188,26 @@ def cea4_verify(scenario): #1. about zone.shp and surroundings.shp SHAPEFILES = ['zone', 'surroundings'] - COLUMNS_ZONE = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', + COLUMNS_ZONE_4 = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', 'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r'] - COLUMNS_SURROUNDINGS = ['name', 'height_ag', 'floors_ag'] + COLUMNS_SURROUNDINGS_4 = ['name', 'height_ag', 'floors_ag'] list_missing_attributes_zone = [] list_missing_attributes_surroundings = [] - list_missing_files_shp_building_geometries = verify_file_exists(scenario, SHAPEFILES) - if list_missing_files_shp_building_geometries: + list_missing_files_shp_building_geometry = verify_file_exists(scenario, SHAPEFILES) + if list_missing_files_shp_building_geometry: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometries)) - if 'zone' not in list_missing_files_shp_building_geometries: - list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE) + if 'zone' not in list_missing_files_shp_building_geometry: + list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) if 'name' in list_missing_attributes_zone: list_names_duplicated = verify_name_duplicates(scenario, 'zone') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - if 'surroundings' not in list_missing_files_shp_building_geometries: - list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS) + if 'surroundings' not in list_missing_files_shp_building_geometry: + list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) if list_missing_attributes_surroundings: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) @@ -291,13 +291,14 @@ def cea4_verify(scenario): list_missing_files_streets = verify_file_exists(scenario, ['streets']) if list_missing_files_streets: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, when thermal networks analysis is required.') + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') #4. verify the DB under the "inputs/technology/" folder list_missing_files_db = [] # Compile the results dict_missing = { + 'building-geometry': list_missing_files_shp_building_geometry, 'zone': list_missing_attributes_zone, 'surroundings': list_missing_attributes_surroundings, 'building-properties': list_missing_files_csv_building_properties, @@ -319,6 +320,7 @@ def cea4_verify(scenario): 'are all verified as present and compatible with the current version of CEA-4.' ) + return dict_missing ## -------------------------------------------------------------------------------------------------------------------- @@ -329,11 +331,10 @@ def cea4_verify(scenario): def main(config): # Start the timer t0 = time.perf_counter() - locator = cea.inputlocator.InputLocator(scenario=config.scenario) assert os.path.exists(config.general.project), 'input file not found: %s' % config.project # Execute the verification - dict_missing = cea4_verify(scenario=config.scenario) + cea4_verify(scenario=config.scenario) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 4a8f8135e4..1458b834ea 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -18,3 +18,4 @@ __maintainer__ = "Reynold Mok" __email__ = "cea@arch.ethz.ch" __status__ = "Production" + From 4dde9ee6fecb5a6c48dd2d70fd272bc54a3bbf61 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sat, 11 Jan 2025 22:49:19 +0100 Subject: [PATCH 08/29] typology merged to zone --- .../format_helper/cea4_migrate.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 89f9729519..b6bb33ab96 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -61,6 +61,30 @@ def path_to_input_file_without_db_3(scenario, item): ## -------------------------------------------------------------------------------------------------------------------- ## Helper functions ## -------------------------------------------------------------------------------------------------------------------- +def merge_then_replace_shapefile_dbf(scenario, item, new_dataframe): + """ + Replace the DBF file of a shapefile with the contents of a new DataFrame, + ensuring matching of `['Name']` in the shapefile and `['name']` in the new DataFrame. + + :param shapefile_path: Path to the shapefile (without file extension). + :param new_dataframe: pandas DataFrame with the new data to replace the DBF file. + """ + # Load the original shapefile + shapefile_path = path_to_input_file_without_db_3(scenario, item) + gdf = gpd.read_file(shapefile_path + ".shp") + + # Perform an inner join to match rows based on ['Name'] and ['name'] + merged = gdf.merge(new_dataframe, how='outer', left_on='Name', right_on='name') + + # Ensure all geometries are preserved + if len(merged) != len(gdf): + raise ValueError("Not all rows in the GeoDataFrame have a matching entry in the new DataFrame.") + + # Drop duplicate or unnecessary columns, keeping only the new attributes + new_gdf = merged.drop(columns=['Name'], errors='ignore') + + # Save the updated shapefile + new_gdf.to_file(shapefile_path, driver="ESRI Shapefile") ## -------------------------------------------------------------------------------------------------------------------- @@ -111,6 +135,12 @@ def migrate_cea3_to_cea4(scenario): zone_df_4 = pd.merge(zone_df, typology_df, left_on=['name'], right_on=["Name"], how='left') zone_df_4.drop(columns=['Name'], inplace=True) typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) + + # Merge, replace, and remove. + merge_then_replace_shapefile_dbf(scenario, 'zone', typology_df) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') + os.remove(path_to_input_file_without_db_3(scenario, 'typology')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp has been removed.') else: raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration.') elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: From b371319ec2a9da4c119addb23f4404abaa159770 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sat, 11 Jan 2025 23:12:17 +0100 Subject: [PATCH 09/29] surroundings --- .../format_helper/cea4_migrate.py | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index b6bb33ab96..7c7f21c1bb 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -61,7 +61,7 @@ def path_to_input_file_without_db_3(scenario, item): ## -------------------------------------------------------------------------------------------------------------------- ## Helper functions ## -------------------------------------------------------------------------------------------------------------------- -def merge_then_replace_shapefile_dbf(scenario, item, new_dataframe): +def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): """ Replace the DBF file of a shapefile with the contents of a new DataFrame, ensuring matching of `['Name']` in the shapefile and `['name']` in the new DataFrame. @@ -71,7 +71,9 @@ def merge_then_replace_shapefile_dbf(scenario, item, new_dataframe): """ # Load the original shapefile shapefile_path = path_to_input_file_without_db_3(scenario, item) - gdf = gpd.read_file(shapefile_path + ".shp") + gdf = gpd.read_file(shapefile_path) + list_attributes_3_without_name = [item for item in list_attributes_3 if item != 'name'] + gdf = gdf.drop(columns=list_attributes_3_without_name, errors='ignore') # Perform an inner join to match rows based on ['Name'] and ['name'] merged = gdf.merge(new_dataframe, how='outer', left_on='Name', right_on='name') @@ -120,30 +122,44 @@ def migrate_cea3_to_cea4(scenario): list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') list_missing_files_typology = verify_file_exists(scenario, ['typology']) list_missing_attributes_zone_4 = dict_missing.get('zone') + list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') - zone_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) - zone_df.rename(columns=columns_mapping_dict_name, inplace=True) + zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) + zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) if 'typology' not in list_missing_files_typology: list_missing_attributes_typology_3 = verify_csv(scenario, 'typology', COLUMNS_TYPOLOGY_3) if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) - zone_df_4 = pd.merge(zone_df, typology_df, left_on=['name'], right_on=["Name"], how='left') - zone_df_4.drop(columns=['Name'], inplace=True) typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) + zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') + zone_df_4.drop(columns=['Name'], inplace=True) - # Merge, replace, and remove. - merge_then_replace_shapefile_dbf(scenario, 'zone', typology_df) + # Replace, and remove. + replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') os.remove(path_to_input_file_without_db_3(scenario, 'typology')) print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp has been removed.') else: raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration.') elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-4 format.') + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') + else: + raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.') + + if 'surroundings' not in list_missing_files_shp_building_geometry: + list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) + if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') + surroundings_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) + surroundings_df_4 = surroundings_df_3.rename(columns=columns_mapping_dict_name, inplace=True) + replace_shapefile_dbf(scenario, 'surroundings', surroundings_df_4, COLUMNS_SURROUNDINGS_3) + + elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.') else: - raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with migration.') + raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.') From 9553b441f5aa06f177b9b2648662bd43ebd70a3b Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sat, 11 Jan 2025 23:55:02 +0100 Subject: [PATCH 10/29] dbf to csv --- .../format_helper/cea4_migrate.py | 180 ++++++++++++++++-- .../format_helper/cea4_verify.py | 26 +-- .../format_helper/format_helper.py | 48 +++++ cea/utilities/batch_process_workflow.py | 2 +- 4 files changed, 229 insertions(+), 27 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 7c7f21c1bb..7cbbfd7b68 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -20,7 +20,8 @@ __email__ = "cea@arch.ethz.ch" __status__ = "Production" -from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_file_exists, verify_csv +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_file_exists, verify_csv, \ + verify_name_duplicates from cea.utilities.dbf import dbf_to_dataframe @@ -93,6 +94,40 @@ def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): ## Migrate to CEA-4 format from CEA-3 format ## -------------------------------------------------------------------------------------------------------------------- +def verify_dbf(scenario, item, required_attributes): + """ + Verify if a shapefile contains all required attributes. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Either "zone" or "surroundings". + required_attributes (list): List of attribute names to verify. + + Returns: + A list of missing attributes, or an empty list if all attributes are present. + """ + # Construct the shapefile path + dbf_path = path_to_input_file_without_db_3(scenario, item) + + # Check if the shapefile exists + if not os.path.isfile(dbf_path): + raise FileNotFoundError(f"Shapefile not found: {dbf_path}") + + # Load the shapefile + try: + df = dbf_to_dataframe(dbf_path) + except Exception as e: + raise ValueError(f"Error reading .dbf file: {e}") + + # Get the column names from the shapefile's attribute table + dbf_columns = df.columns.tolist() + + # Check for missing attributes + missing_attributes = [attr for attr in required_attributes if attr not in dbf_columns] + + return missing_attributes + + def migrate_cea3_to_cea4(scenario): # Create the list of items that has been changed from CEA-3 to CEA-4 @@ -104,10 +139,25 @@ def migrate_cea3_to_cea4(scenario): #0. get the scenario name scenario_name = os.path.basename(scenario) - #1. about zone.shp and surroundings.shp COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag'] + CSV_BUILDING_PROPERTIES_3 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', 'typology'] + COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag'] + COLUMNS_AIR_CONDITIONING_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] + COLUMNS_ARCHITECTURE_3 = ['Name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] + COLUMNS_INDOOR_COMFORT_3 = ['Name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] + COLUMNS_INTERNAL_LOADS_3 = ['Name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] + COLUMNS_SUPPLY_SYSTEMS_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] columns_mapping_dict_name = {'Name': 'name'} columns_mapping_dict_typology = {'YEAR': 'year', 'STANDARD': 'const_type', @@ -118,19 +168,24 @@ def migrate_cea3_to_cea4(scenario): '3RD_USE': 'use_type3', '3RD_USE_R': 'use_type3r' } - + # Verify missing files for CEA-3 format list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') - list_missing_files_typology = verify_file_exists(scenario, ['typology']) + list_missing_files_dbf_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES_3) + if list_missing_files_dbf_building_properties: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + + # Verify missing attributes/columns for CEA-4 format list_missing_attributes_zone_4 = dict_missing.get('zone') list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') + #1. about zone.shp and surroundings.shp if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) - if 'typology' not in list_missing_files_typology: + if 'typology' not in list_missing_files_dbf_building_properties: list_missing_attributes_typology_3 = verify_csv(scenario, 'typology', COLUMNS_TYPOLOGY_3) if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') @@ -138,13 +193,9 @@ def migrate_cea3_to_cea4(scenario): typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') zone_df_4.drop(columns=['Name'], inplace=True) - - # Replace, and remove. replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') - os.remove(path_to_input_file_without_db_3(scenario, 'typology')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp has been removed.') - else: + else: raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration.') elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') @@ -155,11 +206,114 @@ def migrate_cea3_to_cea4(scenario): list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') - surroundings_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) - surroundings_df_4 = surroundings_df_3.rename(columns=columns_mapping_dict_name, inplace=True) - replace_shapefile_dbf(scenario, 'surroundings', surroundings_df_4, COLUMNS_SURROUNDINGS_3) + surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) + surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) + replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.') else: raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.') + + #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files + if 'air_conditioning' not in list_missing_files_dbf_building_properties: + list_missing_columns_air_conditioning = verify_dbf(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) + if list_missing_columns_air_conditioning: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + if 'Name' in list_missing_columns_air_conditioning: + list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + air_conditioning_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'air_conditioning')) + air_conditioning_df.rename(columns=columns_mapping_dict_typology, inplace=True) + os.remove(path_to_input_file_without_db_3(scenario, 'air_conditioning')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 air_conditioning.dbf has been migrated to CEA-4 format.') + + if 'architecture' not in list_missing_files_dbf_building_properties: + list_missing_columns_architecture = verify_dbf(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) + if list_missing_columns_architecture: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.dbf: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + if 'Name' in list_missing_columns_architecture: + list_names_duplicated = verify_name_duplicates(scenario, 'architecture') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + architecture_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'architecture')) + architecture_df.rename(columns=columns_mapping_dict_typology, inplace=True) + os.remove(path_to_input_file_without_db_3(scenario, 'architecture')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 architecture.dbf has been migrated to CEA-4 format.') + + if 'indoor_comfort' not in list_missing_files_dbf_building_properties: + list_missing_columns_indoor_comfort = verify_dbf(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) + if list_missing_columns_indoor_comfort: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + if 'Name' in list_missing_columns_indoor_comfort: + list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + indoor_comfort_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) + indoor_comfort_df.rename(columns=columns_mapping_dict_typology, inplace=True) + os.remove(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 indoor_comfort.dbf has been migrated to CEA-4 format.') + + if 'internal_loads' not in list_missing_files_dbf_building_properties: + list_missing_columns_internal_loads = verify_dbf(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) + if list_missing_columns_internal_loads: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.dbf: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + if 'Name' in list_missing_columns_internal_loads: + list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + internal_loads_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'internal_loads')) + internal_loads_df.rename(columns=columns_mapping_dict_typology, inplace=True) + os.remove(path_to_input_file_without_db_3(scenario, 'internal_loads')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 internal_loads.dbf has been migrated to CEA-4 format.') + + if 'supply_systems' not in list_missing_files_dbf_building_properties: + list_missing_columns_supply_systems = verify_dbf(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) + if list_missing_columns_supply_systems: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.dbf: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + if 'Name' in list_missing_columns_supply_systems: + list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') + if list_names_duplicated: + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + supply_systems_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'supply_systems')) + supply_systems_df.rename(columns=columns_mapping_dict_typology, inplace=True) + os.remove(path_to_input_file_without_db_3(scenario, 'supply_systems')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 supply_systems.dbf has been migrated to CEA-4 format.') + + if 'typology' not in list_missing_files_dbf_building_properties: + os.remove(path_to_input_file_without_db_3(scenario, 'typology')) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 typology.dbf has been removed as it is no longer needed by CEA-4.') + + #3. about the Database + + + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + + +def main(config): + # Start the timer + t0 = time.perf_counter() + assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + + # Execute the verification + migrate_cea3_to_cea4(scenario=config.scenario) + + # Execute the verification + print("A final step to verify if all the data is in the correct format for CEA-4.") + cea4_verify(scenario=config.scenario) + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + print('The entire process of data migration from CEA-3 to CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) + +if __name__ == '__main__': + main(cea.config.Configuration()) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index d23c5929c8..5df0491ad9 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -197,7 +197,7 @@ def cea4_verify(scenario): list_missing_files_shp_building_geometry = verify_file_exists(scenario, SHAPEFILES) if list_missing_files_shp_building_geometry: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometries)) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometry)) if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: @@ -212,20 +212,20 @@ def cea4_verify(scenario): print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) #2. about .csv files under the "inputs/building-properties" folder - CSV_BUILDING_PROPERTIES = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] - COLUMNS_AIR_CONDITIONING = ['name', + CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] + COLUMNS_AIR_CONDITIONING_4 = ['name', 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] - COLUMNS_ARCHITECTURE = ['name', + COLUMNS_ARCHITECTURE_4 = ['name', 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', 'type_win', 'type_shade'] - COLUMNS_INDOOR_COMFORT = ['name', + COLUMNS_INDOOR_COMFORT_4 = ['name', 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] - COLUMNS_INTERNAL_LOADS = ['name', + COLUMNS_INTERNAL_LOADS_4 = ['name', 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] - COLUMNS_SUPPLY_SYSTEMS = ['name', + COLUMNS_SUPPLY_SYSTEMS_4 = ['name', 'type_cs', 'type_hs', 'type_dhw', 'type_el'] list_missing_columns_air_conditioning = [] @@ -234,12 +234,12 @@ def cea4_verify(scenario): list_missing_columns_internal_loads = [] list_missing_columns_supply_systems = [] - list_missing_files_csv_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES) + list_missing_files_csv_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES_4) if list_missing_files_csv_building_properties: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) if 'air_conditioning' not in list_missing_files_csv_building_properties: - list_missing_columns_air_conditioning = verify_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING) + list_missing_columns_air_conditioning = verify_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) if list_missing_columns_air_conditioning: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) if 'name' in list_missing_columns_air_conditioning: @@ -247,7 +247,7 @@ def cea4_verify(scenario): if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'architecture' not in list_missing_files_csv_building_properties: - list_missing_columns_architecture = verify_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE) + list_missing_columns_architecture = verify_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) if list_missing_columns_architecture: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) if 'name' in list_missing_columns_architecture: @@ -255,7 +255,7 @@ def cea4_verify(scenario): if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'indoor_comfort' not in list_missing_files_csv_building_properties: - list_missing_columns_indoor_comfort = verify_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT) + list_missing_columns_indoor_comfort = verify_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) if list_missing_columns_indoor_comfort: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) if 'name' in list_missing_columns_indoor_comfort: @@ -263,7 +263,7 @@ def cea4_verify(scenario): if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'internal_loads' not in list_missing_files_csv_building_properties: - list_missing_columns_internal_loads = verify_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS) + list_missing_columns_internal_loads = verify_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) if list_missing_columns_internal_loads: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) if 'name' in list_missing_columns_internal_loads: @@ -271,7 +271,7 @@ def cea4_verify(scenario): if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'supply_systems' not in list_missing_files_csv_building_properties: - list_missing_columns_supply_systems = verify_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS) + list_missing_columns_supply_systems = verify_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) if list_missing_columns_supply_systems: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) if 'name' in list_missing_columns_supply_systems: diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 1458b834ea..f8328b7d90 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -19,3 +19,51 @@ __email__ = "cea@arch.ethz.ch" __status__ = "Production" + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + +def main(config): + """ + Batch processing all selected scenarios under a project. + + :param config: the configuration object to use + :type config: cea.config.Configuration + :return: + """ + + # Start the timer + t0 = time.perf_counter() + + assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + + project_path = config.general.project + scenario_name = config.general.scenario_name + scenarios_list = config.batch_process_workflow.scenarios_to_simulate + + # Loop over one or all scenarios under the project + for scenario in scenarios_list: + # Ignore hidden directories + if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): + continue + + cea_scenario = os.path.join(project_path, scenario) + print(f'Executing CEA simulations on {cea_scenario}.') + try: + # executing CEA commands + exec_cea_commands(config, cea_scenario) + except subprocess.CalledProcessError as e: + print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}") + err_msg = e.stderr + if err_msg is not None: + print(err_msg.decode()) + raise e + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + print('The entire batch processing sequence is now completed - time elapsed: %d.2 seconds' % time_elapsed) + + +if __name__ == '__main__': + main(cea.config.Configuration()) diff --git a/cea/utilities/batch_process_workflow.py b/cea/utilities/batch_process_workflow.py index 9e2dc78625..1e14b3f69e 100644 --- a/cea/utilities/batch_process_workflow.py +++ b/cea/utilities/batch_process_workflow.py @@ -156,7 +156,7 @@ def exec_cea_commands(config, cea_scenario): def main(config): """ - Batch processing all scenarios under a project. + Batch processing all selectedscenarios under a project. :param config: the configuration object to use :type config: cea.config.Configuration From 8f776e823ce4de8cd4e1257e81c3d352357e7a5d Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 00:12:01 +0100 Subject: [PATCH 11/29] batch process --- .../format_helper/format_helper.py | 44 +++++++++++++++---- cea/scripts.yml | 18 ++++++++ cea/utilities/batch_process_workflow.py | 2 +- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index f8328b7d90..8c134635e1 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -3,12 +3,12 @@ """ -import cea.inputlocator +import cea.config import os +import subprocess +import sys import cea.config import time -import geopandas as gpd - __author__ = "Zhongming Shi" __copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" @@ -20,6 +20,27 @@ __status__ = "Production" +## -------------------------------------------------------------------------------------------------------------------- +## Get the environment and set up the subprocess +## -------------------------------------------------------------------------------------------------------------------- + + +# adding CEA to the environment +# Fix for running in PyCharm for users using micromamba +my_env = os.environ.copy() +my_env['PATH'] = f"{os.path.dirname(sys.executable)}:{my_env['PATH']}" + +def exec_cea_format_helper(config, cea_scenario): + # auto-migrate from CEA-3 to CEA-4 + bool_migrate = config.format_helper.migrate_from_cea_3 + if bool_migrate: + subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True, + capture_output=True) + else: + subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, + capture_output=True) + + ## -------------------------------------------------------------------------------------------------------------------- ## Main function ## -------------------------------------------------------------------------------------------------------------------- @@ -40,21 +61,26 @@ def main(config): project_path = config.general.project scenario_name = config.general.scenario_name - scenarios_list = config.batch_process_workflow.scenarios_to_simulate + scenarios_list = config.format_helper.scenarios_to_verify_and_migrate + bool_migrate = config.format_helper.migrate_from_cea_3 - # Loop over one or all scenarios under the project + # Loop over one or all selected scenarios under the project for scenario in scenarios_list: # Ignore hidden directories if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): continue cea_scenario = os.path.join(project_path, scenario) - print(f'Executing CEA simulations on {cea_scenario}.') + if bool_migrate: + print(f'Executing CEA input data verification and migration on {cea_scenario}.') + else: + print(f'Executing CEA input data verification on {cea_scenario}.') + try: # executing CEA commands - exec_cea_commands(config, cea_scenario) + exec_cea_format_helper(config, cea_scenario) except subprocess.CalledProcessError as e: - print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}") + print(f"CEA input data verification and migration failed at `{scenario_name}`") err_msg = e.stderr if err_msg is not None: print(err_msg.decode()) @@ -62,7 +88,7 @@ def main(config): # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 - print('The entire batch processing sequence is now completed - time elapsed: %d.2 seconds' % time_elapsed) + print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) if __name__ == '__main__': diff --git a/cea/scripts.yml b/cea/scripts.yml index bda3e42c4c..fe5ad19938 100644 --- a/cea/scripts.yml +++ b/cea/scripts.yml @@ -367,6 +367,24 @@ Utilities: module: cea.datamanagement.format_helper.format_helper parameters: ['general:scenario',format-helper] + - name: cea4-migrate + label: CEA-4 Format Helper - Migrate + description: | + Migrates Late-CEA-3 inputs data - Note the process of migration is not reversible. + + interfaces: [cli] + module: cea.datamanagement.format_helper.cea4_migrate + parameters: ['general:scenario'] + + - name: cea4-verify + label: CEA-4 Format Helper - Verify + description: | + Verifies the inputs are in the correct format for CEA-4. + + interfaces: [cli] + module: cea.datamanagement.format_helper.cea4_verify + parameters: ['general:scenario'] + - name: sensitivity-analysis-sampler label: Generate Samples for Sensitivity Analysis (SA) description: Generate samples for sensitivity analysis using Sobol Method. diff --git a/cea/utilities/batch_process_workflow.py b/cea/utilities/batch_process_workflow.py index 1e14b3f69e..4de7772b49 100644 --- a/cea/utilities/batch_process_workflow.py +++ b/cea/utilities/batch_process_workflow.py @@ -172,7 +172,7 @@ def main(config): scenario_name = config.general.scenario_name scenarios_list = config.batch_process_workflow.scenarios_to_simulate - # Loop over one or all scenarios under the project + # Loop over one or all selected scenarios under the project for scenario in scenarios_list: # Ignore hidden directories if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): From 3fdb9ba827ab94212b8fa5371bd6bddaee24bc32 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 00:13:55 +0100 Subject: [PATCH 12/29] error message --- cea/datamanagement/format_helper/format_helper.py | 2 +- cea/utilities/batch_process_workflow.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 8c134635e1..3a736cddef 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -80,7 +80,7 @@ def main(config): # executing CEA commands exec_cea_format_helper(config, cea_scenario) except subprocess.CalledProcessError as e: - print(f"CEA input data verification and migration failed at `{scenario_name}`") + print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") err_msg = e.stderr if err_msg is not None: print(err_msg.decode()) diff --git a/cea/utilities/batch_process_workflow.py b/cea/utilities/batch_process_workflow.py index 4de7772b49..95523e246c 100644 --- a/cea/utilities/batch_process_workflow.py +++ b/cea/utilities/batch_process_workflow.py @@ -184,7 +184,7 @@ def main(config): # executing CEA commands exec_cea_commands(config, cea_scenario) except subprocess.CalledProcessError as e: - print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}") + print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") err_msg = e.stderr if err_msg is not None: print(err_msg.decode()) From bb09b4adb3e4e9c5045d4916af231f57bb5060f4 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 00:23:15 +0100 Subject: [PATCH 13/29] bug fix 1 --- .../format_helper/cea4_migrate.py | 15 ++++--- .../format_helper/cea4_verify.py | 43 +++++++++++-------- 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 7cbbfd7b68..f7c853dd0f 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -220,7 +220,8 @@ def migrate_cea3_to_cea4(scenario): list_missing_columns_air_conditioning = verify_dbf(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) if list_missing_columns_air_conditioning: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) - if 'Name' in list_missing_columns_air_conditioning: + else: + if 'Name' not in list_missing_columns_air_conditioning: list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) @@ -234,7 +235,8 @@ def migrate_cea3_to_cea4(scenario): list_missing_columns_architecture = verify_dbf(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) if list_missing_columns_architecture: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.dbf: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) - if 'Name' in list_missing_columns_architecture: + else: + if 'Name' not in list_missing_columns_architecture: list_names_duplicated = verify_name_duplicates(scenario, 'architecture') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) @@ -248,7 +250,8 @@ def migrate_cea3_to_cea4(scenario): list_missing_columns_indoor_comfort = verify_dbf(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) if list_missing_columns_indoor_comfort: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) - if 'Name' in list_missing_columns_indoor_comfort: + else: + if 'Name' not in list_missing_columns_indoor_comfort: list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) @@ -262,7 +265,8 @@ def migrate_cea3_to_cea4(scenario): list_missing_columns_internal_loads = verify_dbf(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) if list_missing_columns_internal_loads: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.dbf: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) - if 'Name' in list_missing_columns_internal_loads: + else: + if 'Name' not in list_missing_columns_internal_loads: list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) @@ -276,7 +280,8 @@ def migrate_cea3_to_cea4(scenario): list_missing_columns_supply_systems = verify_dbf(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) if list_missing_columns_supply_systems: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.dbf: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) - if 'Name' in list_missing_columns_supply_systems: + else: + if 'Name' not in list_missing_columns_supply_systems: list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 5df0491ad9..4f4fcc6ddd 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -197,19 +197,19 @@ def cea4_verify(scenario): list_missing_files_shp_building_geometry = verify_file_exists(scenario, SHAPEFILES) if list_missing_files_shp_building_geometry: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometry)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometry)) if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) if 'name' in list_missing_attributes_zone: list_names_duplicated = verify_name_duplicates(scenario, 'zone') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'surroundings' not in list_missing_files_shp_building_geometry: list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) if list_missing_attributes_surroundings: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) #2. about .csv files under the "inputs/building-properties" folder CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] @@ -236,48 +236,53 @@ def cea4_verify(scenario): list_missing_files_csv_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES_4) if list_missing_files_csv_building_properties: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) if 'air_conditioning' not in list_missing_files_csv_building_properties: list_missing_columns_air_conditioning = verify_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) if list_missing_columns_air_conditioning: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) - if 'name' in list_missing_columns_air_conditioning: + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + else: + if 'name' not in list_missing_columns_air_conditioning: list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'architecture' not in list_missing_files_csv_building_properties: list_missing_columns_architecture = verify_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) if list_missing_columns_architecture: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) - if 'name' in list_missing_columns_architecture: + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + else: + if 'name' not in list_missing_columns_architecture: list_names_duplicated = verify_name_duplicates(scenario, 'architecture') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'indoor_comfort' not in list_missing_files_csv_building_properties: list_missing_columns_indoor_comfort = verify_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) if list_missing_columns_indoor_comfort: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) - if 'name' in list_missing_columns_indoor_comfort: + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + else: + if 'name' not in list_missing_columns_indoor_comfort: list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') if list_names_duplicated: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'internal_loads' not in list_missing_files_csv_building_properties: list_missing_columns_internal_loads = verify_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) if list_missing_columns_internal_loads: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) - if 'name' in list_missing_columns_internal_loads: + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + else: + if 'name' not in list_missing_columns_internal_loads: list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'supply_systems' not in list_missing_files_csv_building_properties: list_missing_columns_supply_systems = verify_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) if list_missing_columns_supply_systems: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) - if 'name' in list_missing_columns_supply_systems: + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + else: + if 'name' not in list_missing_columns_supply_systems: list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #3. verify if terrain.tif, weather.epw and streets.shp exist From ab288191d3521179ac4419e7ab4431082450fd2a Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 11:44:33 +0100 Subject: [PATCH 14/29] bug fix --- .../format_helper/cea4_migrate.py | 27 ++++++++++++++++--- .../format_helper/cea4_verify.py | 18 +++++++------ .../format_helper/format_helper.py | 2 +- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index f7c853dd0f..35f1eb3ca9 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -20,7 +20,7 @@ __email__ = "cea@arch.ethz.ch" __status__ = "Production" -from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_file_exists, verify_csv, \ +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_csv, \ verify_name_duplicates from cea.utilities.dbf import dbf_to_dataframe @@ -89,6 +89,23 @@ def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): # Save the updated shapefile new_gdf.to_file(shapefile_path, driver="ESRI Shapefile") +def verify_file_exists_3(scenario, items): + """ + Verify if the files in the provided list exist for a given scenario. + + Parameters: + scenario (str): Path or identifier for the scenario. + items (list): List of file identifiers to check. + + Returns: + list: A list of missing file identifiers, or an empty list if all files exist. + """ + list_missing_files = [] + for file in items: + path = path_to_input_file_without_db_3(scenario, file) + if not os.path.isfile(path): + list_missing_files.append(file) + return list_missing_files ## -------------------------------------------------------------------------------------------------------------------- ## Migrate to CEA-4 format from CEA-3 format @@ -170,7 +187,7 @@ def migrate_cea3_to_cea4(scenario): } # Verify missing files for CEA-3 format list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') - list_missing_files_dbf_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES_3) + list_missing_files_dbf_building_properties = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) if list_missing_files_dbf_building_properties: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) @@ -195,8 +212,10 @@ def migrate_cea3_to_cea4(scenario): zone_df_4.drop(columns=['Name'], inplace=True) replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') - else: - raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration.') + else: + raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), + 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration. ' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3)) elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') else: diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 4f4fcc6ddd..fbadd9d9ad 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -49,6 +49,8 @@ def path_to_input_file_without_db_4(scenario, item): path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") elif item == 'weather': path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + else: + raise ValueError(f"Unknown item {item}") return path_to_input_file @@ -125,7 +127,7 @@ def verify_csv(scenario, item, required_columns): return missing_columns -def verify_file_exists(scenario, items): +def verify_file_exists_4(scenario, items): """ Verify if the files in the provided list exist for a given scenario. @@ -195,7 +197,7 @@ def cea4_verify(scenario): list_missing_attributes_zone = [] list_missing_attributes_surroundings = [] - list_missing_files_shp_building_geometry = verify_file_exists(scenario, SHAPEFILES) + list_missing_files_shp_building_geometry = verify_file_exists_4(scenario, SHAPEFILES) if list_missing_files_shp_building_geometry: print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometry)) if 'zone' not in list_missing_files_shp_building_geometry: @@ -234,7 +236,7 @@ def cea4_verify(scenario): list_missing_columns_internal_loads = [] list_missing_columns_supply_systems = [] - list_missing_files_csv_building_properties = verify_file_exists(scenario, CSV_BUILDING_PROPERTIES_4) + list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4) if list_missing_files_csv_building_properties: print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) @@ -286,15 +288,15 @@ def cea4_verify(scenario): #3. verify if terrain.tif, weather.epw and streets.shp exist - list_missing_files_terrain = verify_file_exists(scenario, ['terrain']) + list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) if list_missing_files_terrain: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') - list_missing_files_weather = verify_file_exists(scenario, ['weather']) + list_missing_files_weather = verify_file_exists_4(scenario, ['weather']) if list_missing_files_weather: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') - list_missing_files_streets = verify_file_exists(scenario, ['streets']) + list_missing_files_streets = verify_file_exists_4(scenario, ['streets']) if list_missing_files_streets: print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') @@ -320,8 +322,8 @@ def cea4_verify(scenario): if all(not value for value in dict_missing.values()): print('For Scenario: {scenario},'.format(scenario=scenario_name), - 'input building-geometries ShapeFiles: [zone.shp and surroundings.shp], ' - 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES), + 'input building-geometries ShapeFiles: [zone and surroundings], ' + 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), 'are all verified as present and compatible with the current version of CEA-4.' ) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 3a736cddef..fcade5d5ed 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -80,7 +80,7 @@ def main(config): # executing CEA commands exec_cea_format_helper(config, cea_scenario) except subprocess.CalledProcessError as e: - print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") + print(f"CEA input data verification (and migration) for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") err_msg = e.stderr if err_msg is not None: print(err_msg.decode()) From a5ea4b70fa68d55d93583824ce8ae3b51e9228e0 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 17:00:11 +0100 Subject: [PATCH 15/29] before testing the batch processing --- .../format_helper/cea4_migrate.py | 507 ++++++++++-------- .../format_helper/cea4_verify.py | 163 +++--- .../format_helper/format_helper.py | 31 +- 3 files changed, 398 insertions(+), 303 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 35f1eb3ca9..7f436806e4 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -9,6 +9,7 @@ import time import pandas as pd import geopandas as gpd +import sys __author__ = "Zhongming Shi" @@ -20,10 +21,39 @@ __email__ = "cea@arch.ethz.ch" __status__ = "Production" -from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, verify_csv, \ - verify_name_duplicates +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, CSV_BUILDING_PROPERTIES_4, \ + COLUMNS_ZONE_4, print_verification_results_4, path_to_input_file_without_db_4 from cea.utilities.dbf import dbf_to_dataframe +COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag'] +CSV_BUILDING_PROPERTIES_3 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', 'typology'] + +COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] +COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag'] +COLUMNS_AIR_CONDITIONING_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] +COLUMNS_ARCHITECTURE_3 = ['Name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] +COLUMNS_INDOOR_COMFORT_3 = ['Name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] +COLUMNS_INTERNAL_LOADS_3 = ['Name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] +COLUMNS_SUPPLY_SYSTEMS_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] +columns_mapping_dict_name = {'Name': 'name'} +columns_mapping_dict_typology = {'YEAR': 'year', + 'STANDARD': 'const_type', + '1ST_USE': 'use_type1', + '1ST_USE_R': 'use_type1r', + '2ND_USE': 'use_type2', + '2ND_USE_R': 'use_type2r', + '3RD_USE': 'use_type3', + '3RD_USE_R': 'use_type3r' + } ## -------------------------------------------------------------------------------------------------------------------- ## The paths to the input files for CEA-3 @@ -55,6 +85,8 @@ def path_to_input_file_without_db_3(scenario, item): path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") elif item == 'weather': path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + else: + raise ValueError(f"Unknown item {item}") return path_to_input_file @@ -62,6 +94,74 @@ def path_to_input_file_without_db_3(scenario, item): ## -------------------------------------------------------------------------------------------------------------------- ## Helper functions ## -------------------------------------------------------------------------------------------------------------------- + + +def verify_name_duplicates_3(scenario, item): + """ + Verify if there are duplicate names in the 'name' column of a .csv or .shp file. + + Parameters: + file_path (str): Path to the input file (either .csv or .shp). + + Returns: + list: A list of duplicate names, or an empty list if no duplicates are found. + """ + # Construct the CSV file path + file_path = path_to_input_file_without_db_3(scenario, item) + + # Check file type and load as a DataFrame + if file_path.endswith('.dbf'): + try: + df = dbf_to_dataframe(file_path) + except Exception as e: + raise ValueError(f"Error reading DBF file: {e}") + elif file_path.endswith('.shp'): + try: + df = gpd.read_file(file_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + else: + raise ValueError("Unsupported file type. Please provide a .csv or .shp file.") + + # Find duplicate names + list_names_duplicated = df['Name'][df['Name'].duplicated()].tolist() + + return list_names_duplicated + +def verify_dbf_3(scenario, item, required_columns): + """ + Verify if a DBF file contains all required columns. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Identifier for the CSV file. + required_columns (list): List of column names to verify. + + Returns: + A list of missing columns, or an empty list if all columns are present. + """ + # Construct the CSV file path + dbf_path = path_to_input_file_without_db_3(scenario, item) + + # Check if the CSV file exists + if not os.path.isfile(dbf_path): + raise FileNotFoundError(f"CSV file not found: {dbf_path}") + + # Load the CSV file + try: + df = dbf_to_dataframe(dbf_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + + # Get the column names from the CSV file + dbf_columns = df.columns.tolist() + + # Check for missing columns + missing_columns = [col for col in required_columns if col not in dbf_columns] + + return missing_columns + + def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): """ Replace the DBF file of a shapefile with the contents of a new DataFrame, @@ -73,18 +173,9 @@ def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): # Load the original shapefile shapefile_path = path_to_input_file_without_db_3(scenario, item) gdf = gpd.read_file(shapefile_path) - list_attributes_3_without_name = [item for item in list_attributes_3 if item != 'name'] - gdf = gdf.drop(columns=list_attributes_3_without_name, errors='ignore') - # Perform an inner join to match rows based on ['Name'] and ['name'] - merged = gdf.merge(new_dataframe, how='outer', left_on='Name', right_on='name') - - # Ensure all geometries are preserved - if len(merged) != len(gdf): - raise ValueError("Not all rows in the GeoDataFrame have a matching entry in the new DataFrame.") - - # Drop duplicate or unnecessary columns, keeping only the new attributes - new_gdf = merged.drop(columns=['Name'], errors='ignore') + # Convert the DataFrame to a GeoDataFrame + new_gdf = gpd.GeoDataFrame(new_dataframe, geometry=gdf['geometry'], crs=gdf.crs) # Replace CRS with your specific CRS # Save the updated shapefile new_gdf.to_file(shapefile_path, driver="ESRI Shapefile") @@ -107,216 +198,187 @@ def verify_file_exists_3(scenario, items): list_missing_files.append(file) return list_missing_files + ## -------------------------------------------------------------------------------------------------------------------- ## Migrate to CEA-4 format from CEA-3 format ## -------------------------------------------------------------------------------------------------------------------- -def verify_dbf(scenario, item, required_attributes): - """ - Verify if a shapefile contains all required attributes. - - Parameters: - scenario (str): Path or identifier for the scenario. - item (str): Either "zone" or "surroundings". - required_attributes (list): List of attribute names to verify. - - Returns: - A list of missing attributes, or an empty list if all attributes are present. - """ - # Construct the shapefile path - dbf_path = path_to_input_file_without_db_3(scenario, item) - - # Check if the shapefile exists - if not os.path.isfile(dbf_path): - raise FileNotFoundError(f"Shapefile not found: {dbf_path}") - - # Load the shapefile - try: - df = dbf_to_dataframe(dbf_path) - except Exception as e: - raise ValueError(f"Error reading .dbf file: {e}") - - # Get the column names from the shapefile's attribute table - dbf_columns = df.columns.tolist() - - # Check for missing attributes - missing_attributes = [attr for attr in required_attributes if attr not in dbf_columns] - - return missing_attributes - - def migrate_cea3_to_cea4(scenario): - # Create the list of items that has been changed from CEA-3 to CEA-4 - list_items_changed = ['zone', 'surroundings', - 'air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', - 'typology'] - dict_missing = cea4_verify(scenario) - - #0. get the scenario name + # Get the scenario name scenario_name = os.path.basename(scenario) - COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag'] - CSV_BUILDING_PROPERTIES_3 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', 'typology'] - - COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] - COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag'] - COLUMNS_AIR_CONDITIONING_3 = ['Name', - 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', - 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] - COLUMNS_ARCHITECTURE_3 = ['Name', - 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', - 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', - 'type_win', 'type_shade'] - COLUMNS_INDOOR_COMFORT_3 = ['Name', - 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] - COLUMNS_INTERNAL_LOADS_3 = ['Name', - 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', - 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] - COLUMNS_SUPPLY_SYSTEMS_3 = ['Name', - 'type_cs', 'type_hs', 'type_dhw', 'type_el'] - columns_mapping_dict_name = {'Name': 'name'} - columns_mapping_dict_typology = {'YEAR': 'year', - 'STANDARD': 'const_type', - '1ST_USE': 'use_type1', - '1ST_USE_R': 'use_type1r', - '2ND_USE': 'use_type2', - '2ND_USE_R': 'use_type2r', - '3RD_USE': 'use_type3', - '3RD_USE_R': 'use_type3r' - } - # Verify missing files for CEA-3 format - list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') - list_missing_files_dbf_building_properties = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) - if list_missing_files_dbf_building_properties: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) - - # Verify missing attributes/columns for CEA-4 format - list_missing_attributes_zone_4 = dict_missing.get('zone') - list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') - - #1. about zone.shp and surroundings.shp - if 'zone' not in list_missing_files_shp_building_geometry: - list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) - if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') - zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) - zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) - if 'typology' not in list_missing_files_dbf_building_properties: - list_missing_attributes_typology_3 = verify_csv(scenario, 'typology', COLUMNS_TYPOLOGY_3) - if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') - typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) - typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) - zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') - zone_df_4.drop(columns=['Name'], inplace=True) - replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') - else: - raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), - 'typology.shp does not follow the CEA-3 format. CEA cannot proceed with migration. ' - 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3)) - elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') - else: - raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.') - - if 'surroundings' not in list_missing_files_shp_building_geometry: - list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) - if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') - surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) - surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) - replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) - - elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.') - else: - raise ValueError('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.') - - #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files - if 'air_conditioning' not in list_missing_files_dbf_building_properties: - list_missing_columns_air_conditioning = verify_dbf(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) - if list_missing_columns_air_conditioning: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) - else: - if 'Name' not in list_missing_columns_air_conditioning: - list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') - if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - air_conditioning_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'air_conditioning')) - air_conditioning_df.rename(columns=columns_mapping_dict_typology, inplace=True) - os.remove(path_to_input_file_without_db_3(scenario, 'air_conditioning')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 air_conditioning.dbf has been migrated to CEA-4 format.') - - if 'architecture' not in list_missing_files_dbf_building_properties: - list_missing_columns_architecture = verify_dbf(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) - if list_missing_columns_architecture: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.dbf: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) - else: - if 'Name' not in list_missing_columns_architecture: - list_names_duplicated = verify_name_duplicates(scenario, 'architecture') - if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - architecture_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'architecture')) - architecture_df.rename(columns=columns_mapping_dict_typology, inplace=True) - os.remove(path_to_input_file_without_db_3(scenario, 'architecture')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 architecture.dbf has been migrated to CEA-4 format.') - - if 'indoor_comfort' not in list_missing_files_dbf_building_properties: - list_missing_columns_indoor_comfort = verify_dbf(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) - if list_missing_columns_indoor_comfort: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.dbf: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) - else: - if 'Name' not in list_missing_columns_indoor_comfort: - list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') - if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - indoor_comfort_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) - indoor_comfort_df.rename(columns=columns_mapping_dict_typology, inplace=True) - os.remove(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 indoor_comfort.dbf has been migrated to CEA-4 format.') - - if 'internal_loads' not in list_missing_files_dbf_building_properties: - list_missing_columns_internal_loads = verify_dbf(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) - if list_missing_columns_internal_loads: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.dbf: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) - else: - if 'Name' not in list_missing_columns_internal_loads: - list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') - if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - internal_loads_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'internal_loads')) - internal_loads_df.rename(columns=columns_mapping_dict_typology, inplace=True) - os.remove(path_to_input_file_without_db_3(scenario, 'internal_loads')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 internal_loads.dbf has been migrated to CEA-4 format.') - - if 'supply_systems' not in list_missing_files_dbf_building_properties: - list_missing_columns_supply_systems = verify_dbf(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) - if list_missing_columns_supply_systems: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.dbf: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) - else: - if 'Name' not in list_missing_columns_supply_systems: - list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') - if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - supply_systems_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'supply_systems')) - supply_systems_df.rename(columns=columns_mapping_dict_typology, inplace=True) - os.remove(path_to_input_file_without_db_3(scenario, 'supply_systems')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 supply_systems.dbf has been migrated to CEA-4 format.') - - if 'typology' not in list_missing_files_dbf_building_properties: - os.remove(path_to_input_file_without_db_3(scenario, 'typology')) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 typology.dbf has been removed as it is no longer needed by CEA-4.') - - #3. about the Database - + # Print: Start + div_len = 37 - len(scenario_name) + print('-' * 50) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) + #0. verify if everything is already in the correct format for CEA-4 + dict_missing = cea4_verify(scenario) + if all(not value for value in dict_missing.values()): + print("✓" * 3) + print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), + 'input building-geometries ShapeFiles: [zone and surroundings], ' + 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), + '.' + ) + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + + else: + # Verify missing files for CEA-3 format + list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') + list_missing_files_dbf_building_properties = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) + if list_missing_files_dbf_building_properties: + print('Ensure .dbf file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_dbf_building_properties)) + + # Verify missing attributes/columns for CEA-4 format + list_missing_attributes_zone_4 = dict_missing.get('zone') + list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') + + #1. about zone.shp and surroundings.shp + if 'zone' not in list_missing_files_shp_building_geometry: + list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) + if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') + zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) + zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) + if 'typology' not in list_missing_files_dbf_building_properties: + list_missing_attributes_typology_3 = verify_dbf_3(scenario, 'typology', COLUMNS_TYPOLOGY_3) + if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') + typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) + typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) + zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') + zone_df_4.drop(columns=['Name'], inplace=True) + zone_df_4 = zone_df_4[COLUMNS_ZONE_4] + replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) + print('CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') + else: + raise ValueError('typology.shp exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. ' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3) + ) + elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: + pass + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') + else: + raise ValueError('zone.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_zone_3}.'.format(list_missing_attributes_zone_3=list_missing_attributes_zone_3), + 'Check the following column(s) for CEA-4 format: {list_missing_attributes_zone_4}.'.format(list_missing_attributes_zone_4=list_missing_attributes_zone_4) + ) + + if 'surroundings' not in list_missing_files_shp_building_geometry: + list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) + if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') + surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) + surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) + replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) + print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 surroundings.shp has been migrated to CEA-4 format.') + + elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: + pass + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.') + else: + raise ValueError('surroundings.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_surroundings_3}.'.format(list_missing_attributes_surroundings_3=list_missing_attributes_surroundings_3), + 'Check the following column(s) for CEA-4 format: {list_missing_attributes_surroundings_4}.'.format(list_missing_attributes_surroundings_4=list_missing_attributes_surroundings_4) + ) + + #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files + if 'air_conditioning' not in list_missing_files_dbf_building_properties: + list_missing_columns_air_conditioning = verify_dbf_3(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) + if list_missing_columns_air_conditioning: + print('Ensure column(s) are present in the air_conditioning.dbf: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + else: + if 'Name' not in list_missing_columns_air_conditioning: + list_names_duplicated = verify_name_duplicates_3(scenario, 'air_conditioning') + if list_names_duplicated: + print('Ensure name(s) are unique in air_conditioning.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + air_conditioning_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'air_conditioning')) + air_conditioning_df.rename(columns=columns_mapping_dict_name, inplace=True) + air_conditioning_df.rename(columns=columns_mapping_dict_typology, inplace=True) + air_conditioning_df.to_csv(path_to_input_file_without_db_4(scenario, 'air_conditioning'), index=False) + os.remove(path_to_input_file_without_db_3(scenario, 'air_conditioning')) + print('air_conditioning.dbf has been migrated from CEA-3 to CEA-4 format.') + + if 'architecture' not in list_missing_files_dbf_building_properties: + list_missing_columns_architecture = verify_dbf_3(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) + if list_missing_columns_architecture: + print('Ensure column(s) are present in the architecture.dbf: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + else: + if 'Name' not in list_missing_columns_architecture: + list_names_duplicated = verify_name_duplicates_3(scenario, 'architecture') + if list_names_duplicated: + print('Ensure name(s) are unique in architecture.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + architecture_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'architecture')) + architecture_df.rename(columns=columns_mapping_dict_name, inplace=True) + architecture_df.rename(columns=columns_mapping_dict_typology, inplace=True) + architecture_df.to_csv(path_to_input_file_without_db_4(scenario, 'architecture'), index=False) + os.remove(path_to_input_file_without_db_3(scenario, 'architecture')) + print('architecture.dbf has been migrated from CEA-3 to CEA-4 format.') + + if 'indoor_comfort' not in list_missing_files_dbf_building_properties: + list_missing_columns_indoor_comfort = verify_dbf_3(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) + if list_missing_columns_indoor_comfort: + print('Ensure column(s) are present in the air_conditioning.dbf: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + else: + if 'Name' not in list_missing_columns_indoor_comfort: + list_names_duplicated = verify_name_duplicates_3(scenario, 'indoor_comfort') + if list_names_duplicated: + print('Ensure name(s) are unique in indoor_comfort.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + indoor_comfort_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) + indoor_comfort_df.rename(columns=columns_mapping_dict_name, inplace=True) + indoor_comfort_df.rename(columns=columns_mapping_dict_typology, inplace=True) + indoor_comfort_df.to_csv(path_to_input_file_without_db_4(scenario, 'indoor_comfort'), index=False) + os.remove(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) + print('indoor_comfort.dbf has been migrated from CEA-3 to CEA-4 format.') + + if 'internal_loads' not in list_missing_files_dbf_building_properties: + list_missing_columns_internal_loads = verify_dbf_3(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) + if list_missing_columns_internal_loads: + print('Ensure column(s) are present in the internal_loads.dbf: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + else: + if 'Name' not in list_missing_columns_internal_loads: + list_names_duplicated = verify_name_duplicates_3(scenario, 'internal_loads') + if list_names_duplicated: + print('Ensure name(s) are unique in internal_loads.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + internal_loads_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'internal_loads')) + internal_loads_df.rename(columns=columns_mapping_dict_name, inplace=True) + internal_loads_df.rename(columns=columns_mapping_dict_typology, inplace=True) + internal_loads_df.to_csv(path_to_input_file_without_db_4(scenario, 'internal_loads'), index=False) + os.remove(path_to_input_file_without_db_3(scenario, 'internal_loads')) + print('internal_loads.dbf has been migrated from CEA-3 to CEA-4 format.') + + if 'supply_systems' not in list_missing_files_dbf_building_properties: + list_missing_columns_supply_systems = verify_dbf_3(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) + if list_missing_columns_supply_systems: + print('Ensure column(s) are present in the supply_systems.dbf: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + else: + if 'Name' not in list_missing_columns_supply_systems: + list_names_duplicated = verify_name_duplicates_3(scenario, 'supply_systems') + if list_names_duplicated: + print('Ensure name(s) are unique in supply_systems.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + else: + supply_systems_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'supply_systems')) + supply_systems_df.rename(columns=columns_mapping_dict_name, inplace=True) + supply_systems_df.rename(columns=columns_mapping_dict_typology, inplace=True) + supply_systems_df.to_csv(path_to_input_file_without_db_4(scenario, 'supply_systems'), index=False) + os.remove(path_to_input_file_without_db_3(scenario, 'supply_systems')) + print('supply_systems.dbf has been migrated from CEA-3 to CEA-4 format.') + + if 'typology' not in list_missing_files_dbf_building_properties: + os.remove(path_to_input_file_without_db_3(scenario, 'typology')) + print('typology.dbf has been removed as it is no longer needed by CEA-4.') + + #3. about the Database + + + # Print: End + print("-" * 50) ## -------------------------------------------------------------------------------------------------------------------- ## Main function @@ -328,12 +390,21 @@ def main(config): t0 = time.perf_counter() assert os.path.exists(config.general.project), 'input file not found: %s' % config.project - # Execute the verification - migrate_cea3_to_cea4(scenario=config.scenario) + scenario = config.scenario + scenario_name = os.path.basename(scenario) # Execute the verification - print("A final step to verify if all the data is in the correct format for CEA-4.") - cea4_verify(scenario=config.scenario) + migrate_cea3_to_cea4(scenario) + + # Execute the verification again + dict_missing = cea4_verify(scenario) + + # Print the verification results + print_verification_results_4(scenario_name, dict_missing) + + # Print: End + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + print('+' * 50) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index fbadd9d9ad..00298e5d75 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -21,6 +21,26 @@ __status__ = "Production" +SHAPEFILES = ['zone', 'surroundings'] +COLUMNS_ZONE_4 = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', + 'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r'] +COLUMNS_SURROUNDINGS_4 = ['name', 'height_ag', 'floors_ag'] +CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] +COLUMNS_AIR_CONDITIONING_4 = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] +COLUMNS_ARCHITECTURE_4 = ['name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] +COLUMNS_INDOOR_COMFORT_4 = ['name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] +COLUMNS_INTERNAL_LOADS_4 = ['name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] +COLUMNS_SUPPLY_SYSTEMS_4 = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] + ## -------------------------------------------------------------------------------------------------------------------- ## The paths to the input files for CEA-4 ## -------------------------------------------------------------------------------------------------------------------- @@ -93,7 +113,7 @@ def verify_shp(scenario, item, required_attributes): return missing_attributes -def verify_csv(scenario, item, required_columns): +def verify_csv_4(scenario, item, required_columns): """ Verify if a CSV file contains all required columns. @@ -146,7 +166,7 @@ def verify_file_exists_4(scenario, items): return list_missing_files -def verify_name_duplicates(scenario, item): +def verify_name_duplicates_4(scenario, item): """ Verify if there are duplicate names in the 'name' column of a .csv or .shp file. @@ -179,6 +199,32 @@ def verify_name_duplicates(scenario, item): return list_names_duplicated +def print_verification_results_4(scenario_name, dict_missing): + + list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') + list_missing_files_csv_building_properties = dict_missing.get('building-properties') + + if all(not value for value in dict_missing.values()): + print("✓" * 3) + print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), + 'input building-geometries ShapeFiles: [zone and surroundings], ' + 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), + '.' + ) + else: + print("!" * 3) + print('All or some of input data files/columns are missing or incompatible with the current version of CEA-4 for Scenario: {scenario}. '.format(scenario=scenario_name), + 'If you are migrating your input data from CEA-3 to CEA-4, set the toggle `migrate_from_cea_3` to `True`. ' + 'If you manually prepared the input data, check the log for missing files and/or incompatible columns.' + ) + + if list_missing_files_shp_building_geometry: + print('Ensure .shp file(s) are present in the building-geometry folder: {missing_files_shp_building_geometry}'.format(missing_files_shp_building_geometry=list_missing_files_shp_building_geometry)) + + if list_missing_files_csv_building_properties: + print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + + ## -------------------------------------------------------------------------------------------------------------------- ## Unique traits for the CEA-4 format ## -------------------------------------------------------------------------------------------------------------------- @@ -189,47 +235,28 @@ def cea4_verify(scenario): scenario_name = os.path.basename(scenario) #1. about zone.shp and surroundings.shp - SHAPEFILES = ['zone', 'surroundings'] - COLUMNS_ZONE_4 = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', - 'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r'] - COLUMNS_SURROUNDINGS_4 = ['name', 'height_ag', 'floors_ag'] - list_missing_attributes_zone = [] list_missing_attributes_surroundings = [] - list_missing_files_shp_building_geometry = verify_file_exists_4(scenario, SHAPEFILES) - if list_missing_files_shp_building_geometry: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .shp file(s) are present in the building-geometries folder: {missing_files_shp_building_geometries}'.format(missing_files_shp_building_geometries=list_missing_files_shp_building_geometry)) + if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) - if 'name' in list_missing_attributes_zone: - list_names_duplicated = verify_name_duplicates(scenario, 'zone') + print('Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + if 'name' not in list_missing_attributes_zone: + list_names_duplicated = verify_name_duplicates_4(scenario, 'zone') if list_names_duplicated: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'surroundings' not in list_missing_files_shp_building_geometry: list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) if list_missing_attributes_surroundings: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + print('Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + if 'name' not in list_missing_attributes_surroundings: + list_names_duplicated = verify_name_duplicates_4(scenario, 'surroundings') + if list_names_duplicated: + print('Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #2. about .csv files under the "inputs/building-properties" folder - CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] - COLUMNS_AIR_CONDITIONING_4 = ['name', - 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', - 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] - COLUMNS_ARCHITECTURE_4 = ['name', - 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', - 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', - 'type_win', 'type_shade'] - COLUMNS_INDOOR_COMFORT_4 = ['name', - 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] - COLUMNS_INTERNAL_LOADS_4 = ['name', - 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', - 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] - COLUMNS_SUPPLY_SYSTEMS_4 = ['name', - 'type_cs', 'type_hs', 'type_dhw', 'type_el'] - list_missing_columns_air_conditioning = [] list_missing_columns_architecture = [] list_missing_columns_indoor_comfort = [] @@ -237,68 +264,65 @@ def cea4_verify(scenario): list_missing_columns_supply_systems = [] list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4) - if list_missing_files_csv_building_properties: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) - if 'air_conditioning' not in list_missing_files_csv_building_properties: - list_missing_columns_air_conditioning = verify_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) + list_missing_columns_air_conditioning = verify_csv_4(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) if list_missing_columns_air_conditioning: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + print('Ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) else: if 'name' not in list_missing_columns_air_conditioning: - list_names_duplicated = verify_name_duplicates(scenario, 'air_conditioning') + list_names_duplicated = verify_name_duplicates_4(scenario, 'air_conditioning') if list_names_duplicated: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'architecture' not in list_missing_files_csv_building_properties: - list_missing_columns_architecture = verify_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) + list_missing_columns_architecture = verify_csv_4(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) if list_missing_columns_architecture: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + print('Ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) else: if 'name' not in list_missing_columns_architecture: - list_names_duplicated = verify_name_duplicates(scenario, 'architecture') + list_names_duplicated = verify_name_duplicates_4(scenario, 'architecture') if list_names_duplicated: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'indoor_comfort' not in list_missing_files_csv_building_properties: - list_missing_columns_indoor_comfort = verify_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) + list_missing_columns_indoor_comfort = verify_csv_4(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) if list_missing_columns_indoor_comfort: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + print('Ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) else: if 'name' not in list_missing_columns_indoor_comfort: - list_names_duplicated = verify_name_duplicates(scenario, 'indoor_comfort') + list_names_duplicated = verify_name_duplicates_4(scenario, 'indoor_comfort') if list_names_duplicated: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'internal_loads' not in list_missing_files_csv_building_properties: - list_missing_columns_internal_loads = verify_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) + list_missing_columns_internal_loads = verify_csv_4(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) if list_missing_columns_internal_loads: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + print('Ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) else: if 'name' not in list_missing_columns_internal_loads: - list_names_duplicated = verify_name_duplicates(scenario, 'internal_loads') + list_names_duplicated = verify_name_duplicates_4(scenario, 'internal_loads') if list_names_duplicated: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'supply_systems' not in list_missing_files_csv_building_properties: - list_missing_columns_supply_systems = verify_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) + list_missing_columns_supply_systems = verify_csv_4(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) if list_missing_columns_supply_systems: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + print('Ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) else: if 'name' not in list_missing_columns_supply_systems: - list_names_duplicated = verify_name_duplicates(scenario, 'supply_systems') + list_names_duplicated = verify_name_duplicates_4(scenario, 'supply_systems') if list_names_duplicated: - print('For Scenario: {scenario},'.format(scenario=scenario_name), 'ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('Ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #3. verify if terrain.tif, weather.epw and streets.shp exist list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) if list_missing_files_terrain: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') + print('Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') list_missing_files_weather = verify_file_exists_4(scenario, ['weather']) if list_missing_files_weather: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') + print('Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') list_missing_files_streets = verify_file_exists_4(scenario, ['streets']) if list_missing_files_streets: - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') + print('Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') #4. verify the DB under the "inputs/technology/" folder list_missing_files_db = [] @@ -320,13 +344,6 @@ def cea4_verify(scenario): 'db': list_missing_files_db } - if all(not value for value in dict_missing.values()): - print('For Scenario: {scenario},'.format(scenario=scenario_name), - 'input building-geometries ShapeFiles: [zone and surroundings], ' - 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), - 'are all verified as present and compatible with the current version of CEA-4.' - ) - return dict_missing @@ -340,11 +357,27 @@ def main(config): t0 = time.perf_counter() assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + # Get the scenario name + scenario = config.scenario + scenario_name = os.path.basename(scenario) + + # Print: Start + div_len = 37 - len(scenario_name) + print('-' * 50) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) + # Execute the verification - cea4_verify(scenario=config.scenario) + dict_missing = cea4_verify(scenario) + + # Print the results + print_verification_results_4(scenario_name, dict_missing) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 + + # Print: End + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + print('+' * 50) print('The entire process of CEA-4 format verification is now completed - time elapsed: %d.2 seconds' % time_elapsed) if __name__ == '__main__': diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index fcade5d5ed..80b645923d 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -9,6 +9,8 @@ import sys import cea.config import time +from cea.datamanagement.format_helper.cea4_verify import cea4_verify +from cea.datamanagement.format_helper.cea4_migrate import migrate_cea3_to_cea4 __author__ = "Zhongming Shi" __copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" @@ -34,11 +36,13 @@ def exec_cea_format_helper(config, cea_scenario): # auto-migrate from CEA-3 to CEA-4 bool_migrate = config.format_helper.migrate_from_cea_3 if bool_migrate: - subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True, - capture_output=True) + # subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True,capture_output=True) + cea4_verify(cea_scenario) + else: - subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, - capture_output=True) + # subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, capture_output=True) + migrate_cea3_to_cea4(cea_scenario) + cea4_verify(cea_scenario) ## -------------------------------------------------------------------------------------------------------------------- @@ -60,9 +64,7 @@ def main(config): assert os.path.exists(config.general.project), 'input file not found: %s' % config.project project_path = config.general.project - scenario_name = config.general.scenario_name scenarios_list = config.format_helper.scenarios_to_verify_and_migrate - bool_migrate = config.format_helper.migrate_from_cea_3 # Loop over one or all selected scenarios under the project for scenario in scenarios_list: @@ -71,23 +73,12 @@ def main(config): continue cea_scenario = os.path.join(project_path, scenario) - if bool_migrate: - print(f'Executing CEA input data verification and migration on {cea_scenario}.') - else: - print(f'Executing CEA input data verification on {cea_scenario}.') - - try: - # executing CEA commands - exec_cea_format_helper(config, cea_scenario) - except subprocess.CalledProcessError as e: - print(f"CEA input data verification (and migration) for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") - err_msg = e.stderr - if err_msg is not None: - print(err_msg.decode()) - raise e + # executing CEA commands + exec_cea_format_helper(config, cea_scenario) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 + print('+' * 50) print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) From 3c58f0a30dc25bb05a02716ede165432be683f46 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Sun, 12 Jan 2025 23:59:36 +0100 Subject: [PATCH 16/29] it seems everything is in place now before the batch processing --- .../format_helper/cea4_migrate.py | 24 +++--- .../format_helper/cea4_verify.py | 82 +++++++++++-------- 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 7f436806e4..849b8c5af8 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -209,19 +209,19 @@ def migrate_cea3_to_cea4(scenario): scenario_name = os.path.basename(scenario) # Print: Start - div_len = 37 - len(scenario_name) - print('-' * 50) + div_len = 47 - len(scenario_name) + print('-' * 60) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) #0. verify if everything is already in the correct format for CEA-4 dict_missing = cea4_verify(scenario) if all(not value for value in dict_missing.values()): - print("✓" * 3) - print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), - 'input building-geometries ShapeFiles: [zone and surroundings], ' - 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), - '.' - ) + pass + # print("✓" * 3) + # print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), + # 'input building-geometries ShapeFiles: [zone and surroundings], ' + # 'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), + # ) # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) else: @@ -252,7 +252,7 @@ def migrate_cea3_to_cea4(scenario): zone_df_4.drop(columns=['Name'], inplace=True) zone_df_4 = zone_df_4[COLUMNS_ZONE_4] replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) - print('CEA-3 zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') + print('zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') else: raise ValueError('typology.shp exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. ' 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3) @@ -273,7 +273,7 @@ def migrate_cea3_to_cea4(scenario): surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) - print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'CEA-3 surroundings.shp has been migrated to CEA-4 format.') + print('surroundings.shp has been migrated to CEA-4 format.') elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: pass @@ -378,7 +378,7 @@ def migrate_cea3_to_cea4(scenario): # Print: End - print("-" * 50) + print("-" * 60) ## -------------------------------------------------------------------------------------------------------------------- ## Main function @@ -404,7 +404,7 @@ def main(config): # Print: End # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) - print('+' * 50) + print('+' * 60) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 00298e5d75..86f334af18 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -208,28 +208,21 @@ def print_verification_results_4(scenario_name, dict_missing): print("✓" * 3) print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), 'input building-geometries ShapeFiles: [zone and surroundings], ' - 'input building-properties .csv files: {csv_building_properties}'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), - '.' + 'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4) ) else: print("!" * 3) print('All or some of input data files/columns are missing or incompatible with the current version of CEA-4 for Scenario: {scenario}. '.format(scenario=scenario_name), - 'If you are migrating your input data from CEA-3 to CEA-4, set the toggle `migrate_from_cea_3` to `True`. ' - 'If you manually prepared the input data, check the log for missing files and/or incompatible columns.' + 'If you are migrating your input data from CEA-3 to CEA-4 format, set the toggle `migrate_from_cea_3` to `True` and run the script again. ' + 'If you manually prepared the input data, check the log for missing files and/or incompatible columns. Modify your input data according to the log above.' ) - if list_missing_files_shp_building_geometry: - print('Ensure .shp file(s) are present in the building-geometry folder: {missing_files_shp_building_geometry}'.format(missing_files_shp_building_geometry=list_missing_files_shp_building_geometry)) - - if list_missing_files_csv_building_properties: - print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) - ## -------------------------------------------------------------------------------------------------------------------- ## Unique traits for the CEA-4 format ## -------------------------------------------------------------------------------------------------------------------- -def cea4_verify(scenario): +def cea4_verify(scenario, print_results=False): #0. get the scenario name scenario_name = os.path.basename(scenario) @@ -242,19 +235,23 @@ def cea4_verify(scenario): if 'zone' not in list_missing_files_shp_building_geometry: list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: - print('Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + if print_results: + print('Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) if 'name' not in list_missing_attributes_zone: list_names_duplicated = verify_name_duplicates_4(scenario, 'zone') if list_names_duplicated: - print('Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'surroundings' not in list_missing_files_shp_building_geometry: list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) if list_missing_attributes_surroundings: - print('Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + if print_results: + print('Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) if 'name' not in list_missing_attributes_surroundings: list_names_duplicated = verify_name_duplicates_4(scenario, 'surroundings') if list_names_duplicated: - print('Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #2. about .csv files under the "inputs/building-properties" folder list_missing_columns_air_conditioning = [] @@ -264,65 +261,82 @@ def cea4_verify(scenario): list_missing_columns_supply_systems = [] list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4) + if list_missing_files_csv_building_properties: + if print_results: + print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + if 'air_conditioning' not in list_missing_files_csv_building_properties: list_missing_columns_air_conditioning = verify_csv_4(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) if list_missing_columns_air_conditioning: - print('Ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) + if print_results: + print('Ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) else: if 'name' not in list_missing_columns_air_conditioning: list_names_duplicated = verify_name_duplicates_4(scenario, 'air_conditioning') if list_names_duplicated: - print('Ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'architecture' not in list_missing_files_csv_building_properties: list_missing_columns_architecture = verify_csv_4(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) if list_missing_columns_architecture: - print('Ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) + if print_results: + print('Ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) else: if 'name' not in list_missing_columns_architecture: list_names_duplicated = verify_name_duplicates_4(scenario, 'architecture') if list_names_duplicated: - print('Ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'indoor_comfort' not in list_missing_files_csv_building_properties: list_missing_columns_indoor_comfort = verify_csv_4(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) if list_missing_columns_indoor_comfort: - print('Ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) + if print_results: + print('Ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) else: if 'name' not in list_missing_columns_indoor_comfort: list_names_duplicated = verify_name_duplicates_4(scenario, 'indoor_comfort') if list_names_duplicated: - print('Ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'internal_loads' not in list_missing_files_csv_building_properties: list_missing_columns_internal_loads = verify_csv_4(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) if list_missing_columns_internal_loads: - print('Ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) + if print_results: + print('Ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) else: if 'name' not in list_missing_columns_internal_loads: list_names_duplicated = verify_name_duplicates_4(scenario, 'internal_loads') if list_names_duplicated: - print('Ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'supply_systems' not in list_missing_files_csv_building_properties: list_missing_columns_supply_systems = verify_csv_4(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) if list_missing_columns_supply_systems: - print('Ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) + if print_results: + print('Ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) else: if 'name' not in list_missing_columns_supply_systems: list_names_duplicated = verify_name_duplicates_4(scenario, 'supply_systems') if list_names_duplicated: - print('Ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if print_results: + print('Ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #3. verify if terrain.tif, weather.epw and streets.shp exist list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) if list_missing_files_terrain: - print('Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') + if print_results: + print('Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') list_missing_files_weather = verify_file_exists_4(scenario, ['weather']) if list_missing_files_weather: - print('Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') + if print_results: + print('Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') list_missing_files_streets = verify_file_exists_4(scenario, ['streets']) if list_missing_files_streets: - print('Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') + if print_results: + print('Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') #4. verify the DB under the "inputs/technology/" folder list_missing_files_db = [] @@ -344,6 +358,10 @@ def cea4_verify(scenario): 'db': list_missing_files_db } + # Print: End + if print_results: + print("-" * 60) + return dict_missing @@ -362,12 +380,12 @@ def main(config): scenario_name = os.path.basename(scenario) # Print: Start - div_len = 37 - len(scenario_name) - print('-' * 50) + div_len = 47 - len(scenario_name) + print('-' * 60) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) # Execute the verification - dict_missing = cea4_verify(scenario) + dict_missing = cea4_verify(scenario, print_results=True) # Print the results print_verification_results_4(scenario_name, dict_missing) @@ -377,7 +395,7 @@ def main(config): # Print: End # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) - print('+' * 50) + print('+' * 60) print('The entire process of CEA-4 format verification is now completed - time elapsed: %d.2 seconds' % time_elapsed) if __name__ == '__main__': From 4392dec31fa23683c4a3653702e617a0127db65d Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 00:19:23 +0100 Subject: [PATCH 17/29] batch processing is ready DB to be included in future PR --- .../format_helper/cea4_migrate.py | 13 ++++------- .../format_helper/cea4_verify.py | 2 +- .../format_helper/format_helper.py | 23 +++++++++++++++---- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 849b8c5af8..9e429f0cb0 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -205,14 +205,6 @@ def verify_file_exists_3(scenario, items): def migrate_cea3_to_cea4(scenario): - # Get the scenario name - scenario_name = os.path.basename(scenario) - - # Print: Start - div_len = 47 - len(scenario_name) - print('-' * 60) - print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) - #0. verify if everything is already in the correct format for CEA-4 dict_missing = cea4_verify(scenario) if all(not value for value in dict_missing.values()): @@ -393,6 +385,11 @@ def main(config): scenario = config.scenario scenario_name = os.path.basename(scenario) + # Print: Start + div_len = 47 - len(scenario_name) + print('-' * 60) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) + # Execute the verification migrate_cea3_to_cea4(scenario) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 86f334af18..db0b5c34f5 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -381,7 +381,7 @@ def main(config): # Print: Start div_len = 47 - len(scenario_name) - print('-' * 60) + print('+' * 60) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) # Execute the verification diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 80b645923d..8a147a669b 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -9,7 +9,7 @@ import sys import cea.config import time -from cea.datamanagement.format_helper.cea4_verify import cea4_verify +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, print_verification_results_4 from cea.datamanagement.format_helper.cea4_migrate import migrate_cea3_to_cea4 __author__ = "Zhongming Shi" @@ -35,14 +35,19 @@ def exec_cea_format_helper(config, cea_scenario): # auto-migrate from CEA-3 to CEA-4 bool_migrate = config.format_helper.migrate_from_cea_3 - if bool_migrate: + scenario_name = os.path.basename(cea_scenario) + + if not bool_migrate: # subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True,capture_output=True) - cea4_verify(cea_scenario) + dict_missing = cea4_verify(cea_scenario, print_results=True) + print_verification_results_4(scenario_name, dict_missing) else: # subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, capture_output=True) migrate_cea3_to_cea4(cea_scenario) - cea4_verify(cea_scenario) + dict_missing = cea4_verify(cea_scenario) + print_verification_results_4(scenario_name, dict_missing) + ## -------------------------------------------------------------------------------------------------------------------- @@ -66,19 +71,27 @@ def main(config): project_path = config.general.project scenarios_list = config.format_helper.scenarios_to_verify_and_migrate + print('+' * 60) + print('Format Helper is batch-processing the data verification and migration for Scenarios: {scenarios_list}.'.format(scenarios_list=scenarios_list)) + # Loop over one or all selected scenarios under the project for scenario in scenarios_list: # Ignore hidden directories if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): continue + # Print: Start + div_len = 47 - len(scenario) + print('+' * 60) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario) + "-" * div_len) + cea_scenario = os.path.join(project_path, scenario) # executing CEA commands exec_cea_format_helper(config, cea_scenario) # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 - print('+' * 50) + print('+' * 60) print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) From c6d2623fe65c78c67249d47910ab40a1b785db9b Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 00:25:45 +0100 Subject: [PATCH 18/29] ruff test update --- cea/datamanagement/format_helper/cea4_migrate.py | 6 +++--- cea/datamanagement/format_helper/cea4_verify.py | 1 - cea/datamanagement/format_helper/format_helper.py | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 9e429f0cb0..8767152b1f 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -3,13 +3,13 @@ """ -import cea.inputlocator + import os import cea.config import time import pandas as pd import geopandas as gpd -import sys + __author__ = "Zhongming Shi" @@ -21,7 +21,7 @@ __email__ = "cea@arch.ethz.ch" __status__ = "Production" -from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, CSV_BUILDING_PROPERTIES_4, \ +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, \ COLUMNS_ZONE_4, print_verification_results_4, path_to_input_file_without_db_4 from cea.utilities.dbf import dbf_to_dataframe diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index db0b5c34f5..447ba66698 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -3,7 +3,6 @@ """ -import cea.inputlocator import os import cea.config import time diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 8a147a669b..f64ada463b 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -5,7 +5,6 @@ import cea.config import os -import subprocess import sys import cea.config import time From abd8a112bee2d685fbd97c31a6a40df19269f89b Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 08:35:57 +0100 Subject: [PATCH 19/29] ruff updates --- .../format_helper/cea4_migrate.py | 108 +++++------------- .../format_helper/cea4_verify.py | 8 +- .../format_helper/format_helper.py | 2 +- 3 files changed, 32 insertions(+), 86 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 8767152b1f..f7f0f5c274 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -199,6 +199,31 @@ def verify_file_exists_3(scenario, items): return list_missing_files +def migrate_dbf_to_csv(scenario, item, required_columns): + """ + Migrate a DBF file to CSV format with column renaming. + + Args: + scenario: The scenario path + item: The item name (e.g., 'air_conditioning') + required_columns: List of required columns + """ + list_missing_columns = verify_dbf_3(scenario, item, required_columns) + if list_missing_columns: + print(f'Ensure column(s) are present in the {item}.dbf: {list_missing_columns}') + else: + if 'Name' not in list_missing_columns: + list_names_duplicated = verify_name_duplicates_3(scenario, item) + if list_names_duplicated: + print(f'Ensure name(s) are unique in {item}.dbf: {list_names_duplicated} is duplicated.') + else: + df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, item)) + df.rename(columns=columns_mapping_dict_name, inplace=True) + df.rename(columns=columns_mapping_dict_typology, inplace=True) + df.to_csv(path_to_input_file_without_db_4(scenario, item), index=False) + os.remove(path_to_input_file_without_db_3(scenario, item)) + print(f'{item}.dbf has been migrated from CEA-3 to CEA-4 format.') + ## -------------------------------------------------------------------------------------------------------------------- ## Migrate to CEA-4 format from CEA-3 format ## -------------------------------------------------------------------------------------------------------------------- @@ -278,89 +303,16 @@ def migrate_cea3_to_cea4(scenario): #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files if 'air_conditioning' not in list_missing_files_dbf_building_properties: - list_missing_columns_air_conditioning = verify_dbf_3(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) - if list_missing_columns_air_conditioning: - print('Ensure column(s) are present in the air_conditioning.dbf: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) - else: - if 'Name' not in list_missing_columns_air_conditioning: - list_names_duplicated = verify_name_duplicates_3(scenario, 'air_conditioning') - if list_names_duplicated: - print('Ensure name(s) are unique in air_conditioning.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - air_conditioning_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'air_conditioning')) - air_conditioning_df.rename(columns=columns_mapping_dict_name, inplace=True) - air_conditioning_df.rename(columns=columns_mapping_dict_typology, inplace=True) - air_conditioning_df.to_csv(path_to_input_file_without_db_4(scenario, 'air_conditioning'), index=False) - os.remove(path_to_input_file_without_db_3(scenario, 'air_conditioning')) - print('air_conditioning.dbf has been migrated from CEA-3 to CEA-4 format.') + migrate_dbf_to_csv('air_conditioning', COLUMNS_AIR_CONDITIONING_3) if 'architecture' not in list_missing_files_dbf_building_properties: - list_missing_columns_architecture = verify_dbf_3(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) - if list_missing_columns_architecture: - print('Ensure column(s) are present in the architecture.dbf: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) - else: - if 'Name' not in list_missing_columns_architecture: - list_names_duplicated = verify_name_duplicates_3(scenario, 'architecture') - if list_names_duplicated: - print('Ensure name(s) are unique in architecture.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - architecture_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'architecture')) - architecture_df.rename(columns=columns_mapping_dict_name, inplace=True) - architecture_df.rename(columns=columns_mapping_dict_typology, inplace=True) - architecture_df.to_csv(path_to_input_file_without_db_4(scenario, 'architecture'), index=False) - os.remove(path_to_input_file_without_db_3(scenario, 'architecture')) - print('architecture.dbf has been migrated from CEA-3 to CEA-4 format.') + migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) if 'indoor_comfort' not in list_missing_files_dbf_building_properties: - list_missing_columns_indoor_comfort = verify_dbf_3(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) - if list_missing_columns_indoor_comfort: - print('Ensure column(s) are present in the air_conditioning.dbf: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) - else: - if 'Name' not in list_missing_columns_indoor_comfort: - list_names_duplicated = verify_name_duplicates_3(scenario, 'indoor_comfort') - if list_names_duplicated: - print('Ensure name(s) are unique in indoor_comfort.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - indoor_comfort_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) - indoor_comfort_df.rename(columns=columns_mapping_dict_name, inplace=True) - indoor_comfort_df.rename(columns=columns_mapping_dict_typology, inplace=True) - indoor_comfort_df.to_csv(path_to_input_file_without_db_4(scenario, 'indoor_comfort'), index=False) - os.remove(path_to_input_file_without_db_3(scenario, 'indoor_comfort')) - print('indoor_comfort.dbf has been migrated from CEA-3 to CEA-4 format.') - - if 'internal_loads' not in list_missing_files_dbf_building_properties: - list_missing_columns_internal_loads = verify_dbf_3(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) - if list_missing_columns_internal_loads: - print('Ensure column(s) are present in the internal_loads.dbf: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) - else: - if 'Name' not in list_missing_columns_internal_loads: - list_names_duplicated = verify_name_duplicates_3(scenario, 'internal_loads') - if list_names_duplicated: - print('Ensure name(s) are unique in internal_loads.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - internal_loads_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'internal_loads')) - internal_loads_df.rename(columns=columns_mapping_dict_name, inplace=True) - internal_loads_df.rename(columns=columns_mapping_dict_typology, inplace=True) - internal_loads_df.to_csv(path_to_input_file_without_db_4(scenario, 'internal_loads'), index=False) - os.remove(path_to_input_file_without_db_3(scenario, 'internal_loads')) - print('internal_loads.dbf has been migrated from CEA-3 to CEA-4 format.') + migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) if 'supply_systems' not in list_missing_files_dbf_building_properties: - list_missing_columns_supply_systems = verify_dbf_3(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) - if list_missing_columns_supply_systems: - print('Ensure column(s) are present in the supply_systems.dbf: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) - else: - if 'Name' not in list_missing_columns_supply_systems: - list_names_duplicated = verify_name_duplicates_3(scenario, 'supply_systems') - if list_names_duplicated: - print('Ensure name(s) are unique in supply_systems.dbf: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - else: - supply_systems_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'supply_systems')) - supply_systems_df.rename(columns=columns_mapping_dict_name, inplace=True) - supply_systems_df.rename(columns=columns_mapping_dict_typology, inplace=True) - supply_systems_df.to_csv(path_to_input_file_without_db_4(scenario, 'supply_systems'), index=False) - os.remove(path_to_input_file_without_db_3(scenario, 'supply_systems')) - print('supply_systems.dbf has been migrated from CEA-3 to CEA-4 format.') + migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) if 'typology' not in list_missing_files_dbf_building_properties: os.remove(path_to_input_file_without_db_3(scenario, 'typology')) @@ -405,7 +357,7 @@ def main(config): # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 - print('The entire process of data migration from CEA-3 to CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) + print('The entire process of data migration from CEA-3 to CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) if __name__ == '__main__': main(cea.config.Configuration()) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 447ba66698..019e732b44 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -200,9 +200,6 @@ def verify_name_duplicates_4(scenario, item): def print_verification_results_4(scenario_name, dict_missing): - list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') - list_missing_files_csv_building_properties = dict_missing.get('building-properties') - if all(not value for value in dict_missing.values()): print("✓" * 3) print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), @@ -223,9 +220,6 @@ def print_verification_results_4(scenario_name, dict_missing): def cea4_verify(scenario, print_results=False): - #0. get the scenario name - scenario_name = os.path.basename(scenario) - #1. about zone.shp and surroundings.shp list_missing_attributes_zone = [] list_missing_attributes_surroundings = [] @@ -395,7 +389,7 @@ def main(config): # Print: End # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) print('+' * 60) - print('The entire process of CEA-4 format verification is now completed - time elapsed: %d.2 seconds' % time_elapsed) + print('The entire process of CEA-4 format verification is now completed - time elapsed: %.2f seconds' % time_elapsed) if __name__ == '__main__': main(cea.config.Configuration()) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index f64ada463b..08bf7303f5 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -91,7 +91,7 @@ def main(config): # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 print('+' * 60) - print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %d.2 seconds' % time_elapsed) + print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) if __name__ == '__main__': From c8ae6393bc76d9639a2b5156405056fbd4a79d6d Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 08:38:33 +0100 Subject: [PATCH 20/29] ruff 3 --- cea/datamanagement/format_helper/format_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 08bf7303f5..c02dd5b4cb 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -3,7 +3,6 @@ """ -import cea.config import os import sys import cea.config From d508dcd7da84e26bfb685fa603d1c83da3a1e297 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 09:07:08 +0100 Subject: [PATCH 21/29] ruff 4 --- .../format_helper/cea4_migrate.py | 12 ++- .../format_helper/cea4_verify.py | 84 ++++++++----------- 2 files changed, 42 insertions(+), 54 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index f7f0f5c274..93a89fc99d 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -303,7 +303,7 @@ def migrate_cea3_to_cea4(scenario): #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files if 'air_conditioning' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv('air_conditioning', COLUMNS_AIR_CONDITIONING_3) + migrate_dbf_to_csv(scenario,'air_conditioning', COLUMNS_AIR_CONDITIONING_3) if 'architecture' not in list_missing_files_dbf_building_properties: migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) @@ -311,12 +311,18 @@ def migrate_cea3_to_cea4(scenario): if 'indoor_comfort' not in list_missing_files_dbf_building_properties: migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) + if 'internal_loads' not in list_missing_files_dbf_building_properties: + migrate_dbf_to_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) + if 'supply_systems' not in list_missing_files_dbf_building_properties: migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) if 'typology' not in list_missing_files_dbf_building_properties: - os.remove(path_to_input_file_without_db_3(scenario, 'typology')) - print('typology.dbf has been removed as it is no longer needed by CEA-4.') + typology_path = path_to_input_file_without_db_3(scenario, 'typology') + if os.path.exists(typology_path): + os.remove(typology_path) + print('typology.dbf has been removed as it is no longer needed by CEA-4.') + #3. about the Database diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 019e732b44..94b13e8a63 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -214,6 +214,30 @@ def print_verification_results_4(scenario_name, dict_missing): ) +def verify_csv_file(scenario, item, required_columns, print_results=False): + """ + Verify a CSV file's columns and name uniqueness. + + Args: + scenario: The scenario path + item: The item name (e.g., 'air_conditioning') + required_columns: List of required columns + print_results: Whether to print verification results + + Returns: + list: List of missing columns + """ + list_missing_columns = verify_csv_4(scenario, item, required_columns) + if list_missing_columns: + if print_results: + print(f'Ensure column(s) are present in the {item}.csv: {list_missing_columns}') + else: + if 'name' not in list_missing_columns: + list_names_duplicated = verify_name_duplicates_4(scenario, item) + if list_names_duplicated and print_results: + print(f'Ensure name(s) are unique in {item}.csv: {list_names_duplicated} is duplicated.') + return list_missing_columns + ## -------------------------------------------------------------------------------------------------------------------- ## Unique traits for the CEA-4 format ## -------------------------------------------------------------------------------------------------------------------- @@ -259,61 +283,19 @@ def cea4_verify(scenario, print_results=False): print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) if 'air_conditioning' not in list_missing_files_csv_building_properties: - list_missing_columns_air_conditioning = verify_csv_4(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4) - if list_missing_columns_air_conditioning: - if print_results: - print('Ensure column(s) are present in the air_conditioning.csv: {missing_columns_air_conditioning}'.format(missing_columns_air_conditioning=list_missing_columns_air_conditioning)) - else: - if 'name' not in list_missing_columns_air_conditioning: - list_names_duplicated = verify_name_duplicates_4(scenario, 'air_conditioning') - if list_names_duplicated: - if print_results: - print('Ensure name(s) are unique in air_conditioning.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + list_missing_columns_air_conditioning = verify_csv_file(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4, print_results=False) + if 'architecture' not in list_missing_files_csv_building_properties: - list_missing_columns_architecture = verify_csv_4(scenario, 'architecture', COLUMNS_ARCHITECTURE_4) - if list_missing_columns_architecture: - if print_results: - print('Ensure column(s) are present in the architecture.csv: {missing_columns_architecture}'.format(missing_columns_architecture=list_missing_columns_architecture)) - else: - if 'name' not in list_missing_columns_architecture: - list_names_duplicated = verify_name_duplicates_4(scenario, 'architecture') - if list_names_duplicated: - if print_results: - print('Ensure name(s) are unique in architecture.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + list_missing_columns_architecture = verify_csv_file(scenario, 'architecture', COLUMNS_ARCHITECTURE_4, print_results=False) + if 'indoor_comfort' not in list_missing_files_csv_building_properties: - list_missing_columns_indoor_comfort = verify_csv_4(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4) - if list_missing_columns_indoor_comfort: - if print_results: - print('Ensure column(s) are present in the indoor_comfort.csv: {missing_columns_indoor_comfort}'.format(missing_columns_indoor_comfort=list_missing_columns_indoor_comfort)) - else: - if 'name' not in list_missing_columns_indoor_comfort: - list_names_duplicated = verify_name_duplicates_4(scenario, 'indoor_comfort') - if list_names_duplicated: - if print_results: - print('Ensure name(s) are unique in indoor_comfort.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + list_missing_columns_indoor_comfort = verify_csv_file(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4, print_results=False) + if 'internal_loads' not in list_missing_files_csv_building_properties: - list_missing_columns_internal_loads = verify_csv_4(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4) - if list_missing_columns_internal_loads: - if print_results: - print('Ensure column(s) are present in the internal_loads.csv: {missing_columns_internal_loads}'.format(missing_columns_internal_loads=list_missing_columns_internal_loads)) - else: - if 'name' not in list_missing_columns_internal_loads: - list_names_duplicated = verify_name_duplicates_4(scenario, 'internal_loads') - if list_names_duplicated: - if print_results: - print('Ensure name(s) are unique in internal_loads.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) - if 'supply_systems' not in list_missing_files_csv_building_properties: - list_missing_columns_supply_systems = verify_csv_4(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) - if list_missing_columns_supply_systems: - if print_results: - print('Ensure column(s) are present in the supply_systems.csv: {missing_columns_supply_systems}'.format(missing_columns_supply_systems=list_missing_columns_supply_systems)) - else: - if 'name' not in list_missing_columns_supply_systems: - list_names_duplicated = verify_name_duplicates_4(scenario, 'supply_systems') - if list_names_duplicated: - if print_results: - print('Ensure name(s) are unique in supply_systems.csv: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + list_missing_columns_internal_loads = verify_csv_file(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4, print_results=False) + if 'supply_systems' not in list_missing_files_csv_building_properties: + list_missing_columns_supply_systems = verify_csv_file(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4, print_results=False) #3. verify if terrain.tif, weather.epw and streets.shp exist list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) From 20cef51ac3317587637e67d4f600bd015e80394b Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 10:20:45 +0100 Subject: [PATCH 22/29] Update default.config --- cea/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cea/default.config b/cea/default.config index 50fc46ada4..d4644f451e 100644 --- a/cea/default.config +++ b/cea/default.config @@ -617,7 +617,7 @@ scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and mig migrate-from-cea-3 = true migrate-from-cea-3.type = BooleanParameter -migrate-from-cea-3.help = True to auto-migrate the input format to the current version of CEA-4. The process of migration is not reversible. +migrate-from-cea-3.help = True to auto-migrate the input format to the current version of CEA-4 from CEA-3. Note the process of migration is not reversible. [batch-process-workflow] scenarios-to-simulate = From 7fccaacd3558c67b90a0eba7a82dd17068a8628f Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 11:16:34 +0100 Subject: [PATCH 23/29] Update cea/datamanagement/format_helper/cea4_verify.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../format_helper/cea4_verify.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 94b13e8a63..8945d37c1d 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -282,21 +282,17 @@ def cea4_verify(scenario, print_results=False): if print_results: print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) - if 'air_conditioning' not in list_missing_files_csv_building_properties: - list_missing_columns_air_conditioning = verify_csv_file(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_4, print_results=False) - - if 'architecture' not in list_missing_files_csv_building_properties: - list_missing_columns_architecture = verify_csv_file(scenario, 'architecture', COLUMNS_ARCHITECTURE_4, print_results=False) - - if 'indoor_comfort' not in list_missing_files_csv_building_properties: - list_missing_columns_indoor_comfort = verify_csv_file(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_4, print_results=False) - - if 'internal_loads' not in list_missing_files_csv_building_properties: - list_missing_columns_internal_loads = verify_csv_file(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_4, print_results=False) - - if 'supply_systems' not in list_missing_files_csv_building_properties: - list_missing_columns_supply_systems = verify_csv_file(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_4, print_results=False) - + for item, columns in [ + ('air_conditioning', COLUMNS_AIR_CONDITIONING_4), + ('architecture', COLUMNS_ARCHITECTURE_4), + ('indoor_comfort', COLUMNS_INDOOR_COMFORT_4), + ('internal_loads', COLUMNS_INTERNAL_LOADS_4), + ('supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) + ]: + if item not in list_missing_files_csv_building_properties: + locals()[f'list_missing_columns_{item}'] = verify_csv_file( + scenario, item, columns, print_results=print_results + ) #3. verify if terrain.tif, weather.epw and streets.shp exist list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) if list_missing_files_terrain: From 60fb7110c127589c57a04b0bd0aaffb4d0195de0 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 11:22:19 +0100 Subject: [PATCH 24/29] Update format_helper.py --- cea/datamanagement/format_helper/format_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index c02dd5b4cb..96298b7a18 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -64,7 +64,7 @@ def main(config): # Start the timer t0 = time.perf_counter() - assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + assert os.path.exists(config.general.project), 'input file not found: %s' % config.general.project project_path = config.general.project scenarios_list = config.format_helper.scenarios_to_verify_and_migrate From 7a5432f3354d7974a209e27148510e00ad4cf3de Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 14:24:14 +0100 Subject: [PATCH 25/29] covering the rare situation --- .../format_helper/cea4_migrate.py | 110 +++++++++++++----- .../format_helper/cea4_verify.py | 18 +-- 2 files changed, 87 insertions(+), 41 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 93a89fc99d..8343130e00 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -210,19 +210,19 @@ def migrate_dbf_to_csv(scenario, item, required_columns): """ list_missing_columns = verify_dbf_3(scenario, item, required_columns) if list_missing_columns: - print(f'Ensure column(s) are present in the {item}.dbf: {list_missing_columns}') + print(f'+ Ensure column(s) are present in the {item}.dbf: {list_missing_columns}') else: if 'Name' not in list_missing_columns: list_names_duplicated = verify_name_duplicates_3(scenario, item) if list_names_duplicated: - print(f'Ensure name(s) are unique in {item}.dbf: {list_names_duplicated} is duplicated.') + print(f'+ Ensure name(s) are unique in {item}.dbf: {list_names_duplicated} is duplicated.') else: df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, item)) df.rename(columns=columns_mapping_dict_name, inplace=True) df.rename(columns=columns_mapping_dict_typology, inplace=True) df.to_csv(path_to_input_file_without_db_4(scenario, item), index=False) os.remove(path_to_input_file_without_db_3(scenario, item)) - print(f'{item}.dbf has been migrated from CEA-3 to CEA-4 format.') + print(f'+ {item}.dbf has been migrated from CEA-3 to CEA-4 format.') ## -------------------------------------------------------------------------------------------------------------------- ## Migrate to CEA-4 format from CEA-3 format @@ -242,24 +242,28 @@ def migrate_cea3_to_cea4(scenario): # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) else: - # Verify missing files for CEA-3 format - list_missing_files_shp_building_geometry = dict_missing.get('building-geometry') - list_missing_files_dbf_building_properties = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) - if list_missing_files_dbf_building_properties: - print('Ensure .dbf file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_dbf_building_properties)) + # Verify missing files for CEA-3 and CEA-4 formats + list_missing_files_shp_building_geometry_4 = dict_missing.get('building-geometry') + list_missing_files_dbf_building_properties_3 = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) + list_missing_files_csv_building_properties_4 = dict_missing.get('building-properties') # Verify missing attributes/columns for CEA-4 format list_missing_attributes_zone_4 = dict_missing.get('zone') list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') + list_missing_columns_air_conditioning_4 = dict_missing.get('air_conditioning') + list_missing_columns_architecture_4 = dict_missing.get('architecture') + list_missing_columns_indoor_comfort_4 = dict_missing.get('indoor_comfort') + list_missing_columns_internal_loads_4 = dict_missing.get('internal_loads') + list_missing_columns_supply_systems_4 = dict_missing.get('supply_systems') #1. about zone.shp and surroundings.shp - if 'zone' not in list_missing_files_shp_building_geometry: + if 'zone' not in list_missing_files_shp_building_geometry_4: list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) - if 'typology' not in list_missing_files_dbf_building_properties: + if 'typology' not in list_missing_files_dbf_building_properties_3: list_missing_attributes_typology_3 = verify_dbf_3(scenario, 'typology', COLUMNS_TYPOLOGY_3) if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') @@ -269,28 +273,33 @@ def migrate_cea3_to_cea4(scenario): zone_df_4.drop(columns=['Name'], inplace=True) zone_df_4 = zone_df_4[COLUMNS_ZONE_4] replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) - print('zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') + print('+ zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') else: - raise ValueError('typology.shp exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. ' + raise ValueError('+ typology.dbf exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. ' 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3) ) + else: + print("+ CEA is unable to produce a zone.shp compatible to CEA-4 format. To enable the migration, ensure typology.dbf is present in building-properties folder for CEA-3 format.") + elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: pass # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') else: - raise ValueError('zone.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' + raise ValueError('+ zone.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' 'Check the following column(s) for CEA-3 format: {list_missing_attributes_zone_3}.'.format(list_missing_attributes_zone_3=list_missing_attributes_zone_3), 'Check the following column(s) for CEA-4 format: {list_missing_attributes_zone_4}.'.format(list_missing_attributes_zone_4=list_missing_attributes_zone_4) ) + else: + print("+ Ensure zone.shp is present in building-geometry folder.") - if 'surroundings' not in list_missing_files_shp_building_geometry: + if 'surroundings' not in list_missing_files_shp_building_geometry_4: list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) - print('surroundings.shp has been migrated to CEA-4 format.') + print('+ surroundings.shp has been migrated from CEA-3 to CEA-4 format.') elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: pass @@ -300,28 +309,65 @@ def migrate_cea3_to_cea4(scenario): 'Check the following column(s) for CEA-3 format: {list_missing_attributes_surroundings_3}.'.format(list_missing_attributes_surroundings_3=list_missing_attributes_surroundings_3), 'Check the following column(s) for CEA-4 format: {list_missing_attributes_surroundings_4}.'.format(list_missing_attributes_surroundings_4=list_missing_attributes_surroundings_4) ) + else: + print('+ (optional) Run Surroundings-Helper to generate surroundings.shp.') - #2. about the .dbf files in the building-properties folde to be mirgrated to .csv files - if 'air_conditioning' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv(scenario,'air_conditioning', COLUMNS_AIR_CONDITIONING_3) - - if 'architecture' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) - - if 'indoor_comfort' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) - - if 'internal_loads' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) - - if 'supply_systems' not in list_missing_files_dbf_building_properties: - migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) + #2. about the .dbf files in the building-properties folder to be migrated to .csv files + if 'air_conditioning' in list_missing_files_csv_building_properties_4 and not list_missing_columns_air_conditioning_4: + if 'air_conditioning' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) + else: + print("+ Ensure either air_conditioning.dbf or air_conditioning.csv is present in building-properties folder. Run Archetypes-Helper to generate air_conditioning.csv.") + elif 'air_conditioning' not in list_missing_files_csv_building_properties_4 and list_missing_columns_air_conditioning_4: + print('+ Ensure column(s) are present in air_conditioning.csv: {list_missing_columns_air_conditioning_4}.'.format(list_missing_columns_air_conditioning_4=list_missing_columns_air_conditioning_4)) + else: + pass + + if 'architecture' in list_missing_files_csv_building_properties_4 and not list_missing_columns_architecture_4: + if 'architecture' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) + else: + print("+ Ensure either architecture.dbf or architecture.csv is present in building-properties folder. Run Archetypes-Helper to generate architecture.csv.") + elif 'architecture' not in list_missing_files_csv_building_properties_4 and list_missing_columns_architecture_4: + print('Ensure column(s) are present in architecture.csv: {list_missing_columns_architecture_4}.'.format(list_missing_columns_architecture_4=list_missing_columns_architecture_4)) + else: + pass + + if 'indoor_comfort' in list_missing_files_csv_building_properties_4 and not list_missing_columns_indoor_comfort_4: + if 'indoor_comfort' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) + else: + print("+ Ensure either indoor_comfort.dbf or indoor_comfort.csv is present in building-properties folder. Run Archetypes-Helper to generate indoor_comfort.csv.") + elif 'indoor_comfort' not in list_missing_files_csv_building_properties_4 and list_missing_columns_indoor_comfort_4: + print('+ Ensure column(s) are present in indoor_comfort.csv: {list_missing_columns_indoor_comfort_4}.'.format(list_missing_columns_indoor_comfort_4=list_missing_columns_indoor_comfort_4)) + else: + pass + + if 'internal_loads' in list_missing_files_csv_building_properties_4 and not list_missing_columns_internal_loads_4: + if 'internal_loads' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) + else: + print("+ Ensure either internal_loads.dbf or internal_loads.csv is present in building-properties folder. Run Archetypes-Helper to generate internal_loads.csv.") + elif 'internal_loads' not in list_missing_files_csv_building_properties_4 and list_missing_columns_internal_loads_4: + print('+ Ensure column(s) are present in internal_loads.csv: {list_missing_columns_internal_loads_4}.'.format(list_missing_columns_internal_loads_4=list_missing_columns_internal_loads_4)) + else: + pass + + if 'supply_systems' in list_missing_files_csv_building_properties_4 and not list_missing_columns_supply_systems_4: + if 'supply_systems' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) + else: + print("+ Ensure either supply_systems.dbf or supply_systems.csv is present in building-properties folder. Run Archetypes-Helper to generate supply_systems.csv.") + elif 'supply_systems' not in list_missing_files_csv_building_properties_4 and list_missing_columns_supply_systems_4: + print('+ Ensure column(s) are present in supply_systems.csv: {list_missing_columns_supply_system_4}.'.format(list_missing_columns_supply_system_4=list_missing_columns_supply_systems_4)) + else: + pass - if 'typology' not in list_missing_files_dbf_building_properties: + if 'typology' not in list_missing_files_dbf_building_properties_3: typology_path = path_to_input_file_without_db_3(scenario, 'typology') if os.path.exists(typology_path): os.remove(typology_path) - print('typology.dbf has been removed as it is no longer needed by CEA-4.') + print('+ typology.dbf has been removed as it is no longer needed by CEA-4.') #3. about the Database diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index 8945d37c1d..c910998ec3 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -230,7 +230,7 @@ def verify_csv_file(scenario, item, required_columns, print_results=False): list_missing_columns = verify_csv_4(scenario, item, required_columns) if list_missing_columns: if print_results: - print(f'Ensure column(s) are present in the {item}.csv: {list_missing_columns}') + print(f'+ Ensure column(s) are present in the {item}.csv: {list_missing_columns}') else: if 'name' not in list_missing_columns: list_names_duplicated = verify_name_duplicates_4(scenario, item) @@ -253,22 +253,22 @@ def cea4_verify(scenario, print_results=False): list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) if list_missing_attributes_zone: if print_results: - print('Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + print('+ Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) if 'name' not in list_missing_attributes_zone: list_names_duplicated = verify_name_duplicates_4(scenario, 'zone') if list_names_duplicated: if print_results: - print('Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('+ Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) if 'surroundings' not in list_missing_files_shp_building_geometry: list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) if list_missing_attributes_surroundings: if print_results: - print('Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + print('+ Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) if 'name' not in list_missing_attributes_surroundings: list_names_duplicated = verify_name_duplicates_4(scenario, 'surroundings') if list_names_duplicated: if print_results: - print('Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + print('+ Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) #2. about .csv files under the "inputs/building-properties" folder list_missing_columns_air_conditioning = [] @@ -280,7 +280,7 @@ def cea4_verify(scenario, print_results=False): list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4) if list_missing_files_csv_building_properties: if print_results: - print('Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + print('+ Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) for item, columns in [ ('air_conditioning', COLUMNS_AIR_CONDITIONING_4), @@ -297,17 +297,17 @@ def cea4_verify(scenario, print_results=False): list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) if list_missing_files_terrain: if print_results: - print('Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') + print('+ Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') list_missing_files_weather = verify_file_exists_4(scenario, ['weather']) if list_missing_files_weather: if print_results: - print('Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') + print('+ Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') list_missing_files_streets = verify_file_exists_4(scenario, ['streets']) if list_missing_files_streets: if print_results: - print('Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') + print('+ Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') #4. verify the DB under the "inputs/technology/" folder list_missing_files_db = [] From 8e0a209bcc3649142ba9df7ff11f9c6e108f8119 Mon Sep 17 00:00:00 2001 From: Reynold Mok <34395415+reyery@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:27:16 +0100 Subject: [PATCH 26/29] Remove unused env set up --- .../format_helper/format_helper.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 96298b7a18..62862c0866 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -20,16 +20,6 @@ __status__ = "Production" -## -------------------------------------------------------------------------------------------------------------------- -## Get the environment and set up the subprocess -## -------------------------------------------------------------------------------------------------------------------- - - -# adding CEA to the environment -# Fix for running in PyCharm for users using micromamba -my_env = os.environ.copy() -my_env['PATH'] = f"{os.path.dirname(sys.executable)}:{my_env['PATH']}" - def exec_cea_format_helper(config, cea_scenario): # auto-migrate from CEA-3 to CEA-4 bool_migrate = config.format_helper.migrate_from_cea_3 @@ -47,7 +37,6 @@ def exec_cea_format_helper(config, cea_scenario): print_verification_results_4(scenario_name, dict_missing) - ## -------------------------------------------------------------------------------------------------------------------- ## Main function ## -------------------------------------------------------------------------------------------------------------------- @@ -70,7 +59,7 @@ def main(config): scenarios_list = config.format_helper.scenarios_to_verify_and_migrate print('+' * 60) - print('Format Helper is batch-processing the data verification and migration for Scenarios: {scenarios_list}.'.format(scenarios_list=scenarios_list)) + print(f'Format Helper is batch-processing the data verification and migration for Scenarios: {scenarios_list}.') # Loop over one or all selected scenarios under the project for scenario in scenarios_list: @@ -90,7 +79,8 @@ def main(config): # Print the time used for the entire processing time_elapsed = time.perf_counter() - t0 print('+' * 60) - print('The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) + print( + 'The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) if __name__ == '__main__': From 6a9a3364dfdf1bb52862c4b064150038ba682536 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 16:36:24 +0100 Subject: [PATCH 27/29] typology having zone data --- cea/datamanagement/format_helper/cea4_migrate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 8343130e00..6e77cb6444 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -54,6 +54,7 @@ '3RD_USE': 'use_type3', '3RD_USE_R': 'use_type3r' } +COLUMNS_ZONE_TYPOLOGY_3 = ['Name', 'STANDARD', 'YEAR', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] ## -------------------------------------------------------------------------------------------------------------------- ## The paths to the input files for CEA-3 @@ -268,6 +269,7 @@ def migrate_cea3_to_cea4(scenario): if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) + typology_df = typology_df[COLUMNS_ZONE_TYPOLOGY_3] typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') zone_df_4.drop(columns=['Name'], inplace=True) From 185d26308004c91d45eaf9117778292c3e52611c Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 16:37:43 +0100 Subject: [PATCH 28/29] ruff 5 --- cea/datamanagement/format_helper/format_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 62862c0866..73a0de2a9b 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -4,7 +4,6 @@ """ import os -import sys import cea.config import time from cea.datamanagement.format_helper.cea4_verify import cea4_verify, print_verification_results_4 From 966662f567f4010e075f5322eaca3e7b8e1b84b2 Mon Sep 17 00:00:00 2001 From: Zhongming Shi Date: Mon, 13 Jan 2025 16:42:28 +0100 Subject: [PATCH 29/29] - --- cea/datamanagement/format_helper/cea4_migrate.py | 4 ++-- cea/datamanagement/format_helper/cea4_verify.py | 4 ++-- cea/datamanagement/format_helper/format_helper.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py index 6e77cb6444..96e4e4fe96 100644 --- a/cea/datamanagement/format_helper/cea4_migrate.py +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -392,8 +392,8 @@ def main(config): scenario_name = os.path.basename(scenario) # Print: Start - div_len = 47 - len(scenario_name) - print('-' * 60) + div_len = 37 - len(scenario_name) + print('-' * 50) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) # Execute the verification diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py index c910998ec3..19861a6aa2 100644 --- a/cea/datamanagement/format_helper/cea4_verify.py +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -331,7 +331,7 @@ def cea4_verify(scenario, print_results=False): # Print: End if print_results: - print("-" * 60) + print("-" * 50) return dict_missing @@ -351,7 +351,7 @@ def main(config): scenario_name = os.path.basename(scenario) # Print: Start - div_len = 47 - len(scenario_name) + div_len = 37 - len(scenario_name) print('+' * 60) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py index 73a0de2a9b..5b8fe7da97 100644 --- a/cea/datamanagement/format_helper/format_helper.py +++ b/cea/datamanagement/format_helper/format_helper.py @@ -67,7 +67,7 @@ def main(config): continue # Print: Start - div_len = 47 - len(scenario) + div_len = 37 - len(scenario) print('+' * 60) print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario) + "-" * div_len)