diff --git a/cea/datamanagement/format_helper/__init__.py b/cea/datamanagement/format_helper/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py new file mode 100644 index 000000000..96e4e4fe9 --- /dev/null +++ b/cea/datamanagement/format_helper/cea4_migrate.py @@ -0,0 +1,417 @@ +""" +Mirgate the format of the input data to CEA-4 format after verification. + +""" + + +import os +import cea.config +import time +import pandas as pd +import geopandas as gpd + + + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" + +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, \ + COLUMNS_ZONE_4, print_verification_results_4, path_to_input_file_without_db_4 +from cea.utilities.dbf import dbf_to_dataframe + +COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag'] +CSV_BUILDING_PROPERTIES_3 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', 'typology'] + +COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] +COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag'] +COLUMNS_AIR_CONDITIONING_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] +COLUMNS_ARCHITECTURE_3 = ['Name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] +COLUMNS_INDOOR_COMFORT_3 = ['Name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] +COLUMNS_INTERNAL_LOADS_3 = ['Name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] +COLUMNS_SUPPLY_SYSTEMS_3 = ['Name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] +columns_mapping_dict_name = {'Name': 'name'} +columns_mapping_dict_typology = {'YEAR': 'year', + 'STANDARD': 'const_type', + '1ST_USE': 'use_type1', + '1ST_USE_R': 'use_type1r', + '2ND_USE': 'use_type2', + '2ND_USE_R': 'use_type2r', + '3RD_USE': 'use_type3', + '3RD_USE_R': 'use_type3r' + } +COLUMNS_ZONE_TYPOLOGY_3 = ['Name', 'STANDARD', 'YEAR', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R'] + +## -------------------------------------------------------------------------------------------------------------------- +## The paths to the input files for CEA-3 +## -------------------------------------------------------------------------------------------------------------------- + +# The paths are relatively hardcoded for now without using the inputlocator script. +# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script. +def path_to_input_file_without_db_3(scenario, item): + + if item == "zone": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp") + elif item == "surroundings": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp") + elif item == "air_conditioning": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.dbf") + elif item == "architecture": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.dbf") + elif item == "indoor_comfort": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.dbf") + elif item == "internal_loads": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.dbf") + elif item == "supply_systems": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.dbf") + elif item == "typology": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "typology.dbf") + elif item == 'streets': + path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp") + elif item == 'terrain': + path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") + elif item == 'weather': + path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + else: + raise ValueError(f"Unknown item {item}") + + return path_to_input_file + + +## -------------------------------------------------------------------------------------------------------------------- +## Helper functions +## -------------------------------------------------------------------------------------------------------------------- + + +def verify_name_duplicates_3(scenario, item): + """ + Verify if there are duplicate names in the 'name' column of a .csv or .shp file. + + Parameters: + file_path (str): Path to the input file (either .csv or .shp). + + Returns: + list: A list of duplicate names, or an empty list if no duplicates are found. + """ + # Construct the CSV file path + file_path = path_to_input_file_without_db_3(scenario, item) + + # Check file type and load as a DataFrame + if file_path.endswith('.dbf'): + try: + df = dbf_to_dataframe(file_path) + except Exception as e: + raise ValueError(f"Error reading DBF file: {e}") + elif file_path.endswith('.shp'): + try: + df = gpd.read_file(file_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + else: + raise ValueError("Unsupported file type. Please provide a .csv or .shp file.") + + # Find duplicate names + list_names_duplicated = df['Name'][df['Name'].duplicated()].tolist() + + return list_names_duplicated + +def verify_dbf_3(scenario, item, required_columns): + """ + Verify if a DBF file contains all required columns. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Identifier for the CSV file. + required_columns (list): List of column names to verify. + + Returns: + A list of missing columns, or an empty list if all columns are present. + """ + # Construct the CSV file path + dbf_path = path_to_input_file_without_db_3(scenario, item) + + # Check if the CSV file exists + if not os.path.isfile(dbf_path): + raise FileNotFoundError(f"CSV file not found: {dbf_path}") + + # Load the CSV file + try: + df = dbf_to_dataframe(dbf_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + + # Get the column names from the CSV file + dbf_columns = df.columns.tolist() + + # Check for missing columns + missing_columns = [col for col in required_columns if col not in dbf_columns] + + return missing_columns + + +def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3): + """ + Replace the DBF file of a shapefile with the contents of a new DataFrame, + ensuring matching of `['Name']` in the shapefile and `['name']` in the new DataFrame. + + :param shapefile_path: Path to the shapefile (without file extension). + :param new_dataframe: pandas DataFrame with the new data to replace the DBF file. + """ + # Load the original shapefile + shapefile_path = path_to_input_file_without_db_3(scenario, item) + gdf = gpd.read_file(shapefile_path) + + # Convert the DataFrame to a GeoDataFrame + new_gdf = gpd.GeoDataFrame(new_dataframe, geometry=gdf['geometry'], crs=gdf.crs) # Replace CRS with your specific CRS + + # Save the updated shapefile + new_gdf.to_file(shapefile_path, driver="ESRI Shapefile") + +def verify_file_exists_3(scenario, items): + """ + Verify if the files in the provided list exist for a given scenario. + + Parameters: + scenario (str): Path or identifier for the scenario. + items (list): List of file identifiers to check. + + Returns: + list: A list of missing file identifiers, or an empty list if all files exist. + """ + list_missing_files = [] + for file in items: + path = path_to_input_file_without_db_3(scenario, file) + if not os.path.isfile(path): + list_missing_files.append(file) + return list_missing_files + + +def migrate_dbf_to_csv(scenario, item, required_columns): + """ + Migrate a DBF file to CSV format with column renaming. + + Args: + scenario: The scenario path + item: The item name (e.g., 'air_conditioning') + required_columns: List of required columns + """ + list_missing_columns = verify_dbf_3(scenario, item, required_columns) + if list_missing_columns: + print(f'+ Ensure column(s) are present in the {item}.dbf: {list_missing_columns}') + else: + if 'Name' not in list_missing_columns: + list_names_duplicated = verify_name_duplicates_3(scenario, item) + if list_names_duplicated: + print(f'+ Ensure name(s) are unique in {item}.dbf: {list_names_duplicated} is duplicated.') + else: + df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, item)) + df.rename(columns=columns_mapping_dict_name, inplace=True) + df.rename(columns=columns_mapping_dict_typology, inplace=True) + df.to_csv(path_to_input_file_without_db_4(scenario, item), index=False) + os.remove(path_to_input_file_without_db_3(scenario, item)) + print(f'+ {item}.dbf has been migrated from CEA-3 to CEA-4 format.') + +## -------------------------------------------------------------------------------------------------------------------- +## Migrate to CEA-4 format from CEA-3 format +## -------------------------------------------------------------------------------------------------------------------- + +def migrate_cea3_to_cea4(scenario): + + #0. verify if everything is already in the correct format for CEA-4 + dict_missing = cea4_verify(scenario) + if all(not value for value in dict_missing.values()): + pass + # print("✓" * 3) + # print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), + # 'input building-geometries ShapeFiles: [zone and surroundings], ' + # 'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4), + # ) + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + + else: + # Verify missing files for CEA-3 and CEA-4 formats + list_missing_files_shp_building_geometry_4 = dict_missing.get('building-geometry') + list_missing_files_dbf_building_properties_3 = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3) + list_missing_files_csv_building_properties_4 = dict_missing.get('building-properties') + + # Verify missing attributes/columns for CEA-4 format + list_missing_attributes_zone_4 = dict_missing.get('zone') + list_missing_attributes_surroundings_4 = dict_missing.get('surroundings') + list_missing_columns_air_conditioning_4 = dict_missing.get('air_conditioning') + list_missing_columns_architecture_4 = dict_missing.get('architecture') + list_missing_columns_indoor_comfort_4 = dict_missing.get('indoor_comfort') + list_missing_columns_internal_loads_4 = dict_missing.get('internal_loads') + list_missing_columns_supply_systems_4 = dict_missing.get('supply_systems') + + #1. about zone.shp and surroundings.shp + if 'zone' not in list_missing_files_shp_building_geometry_4: + list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3) + if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.') + zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone')) + zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True) + if 'typology' not in list_missing_files_dbf_building_properties_3: + list_missing_attributes_typology_3 = verify_dbf_3(scenario, 'typology', COLUMNS_TYPOLOGY_3) + if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.') + typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology')) + typology_df = typology_df[COLUMNS_ZONE_TYPOLOGY_3] + typology_df.rename(columns=columns_mapping_dict_typology, inplace=True) + zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left') + zone_df_4.drop(columns=['Name'], inplace=True) + zone_df_4 = zone_df_4[COLUMNS_ZONE_4] + replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3) + print('+ zone.shp and typology.dbf have been merged and migrated to CEA-4 format.') + else: + raise ValueError('+ typology.dbf exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. ' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3) + ) + else: + print("+ CEA is unable to produce a zone.shp compatible to CEA-4 format. To enable the migration, ensure typology.dbf is present in building-properties folder for CEA-3 format.") + + elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4: + pass + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.') + else: + raise ValueError('+ zone.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_zone_3}.'.format(list_missing_attributes_zone_3=list_missing_attributes_zone_3), + 'Check the following column(s) for CEA-4 format: {list_missing_attributes_zone_4}.'.format(list_missing_attributes_zone_4=list_missing_attributes_zone_4) + ) + else: + print("+ Ensure zone.shp is present in building-geometry folder.") + + if 'surroundings' not in list_missing_files_shp_building_geometry_4: + list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3) + if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4: + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.') + surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings')) + surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True) + replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3) + print('+ surroundings.shp has been migrated from CEA-3 to CEA-4 format.') + + elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4: + pass + # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.') + else: + raise ValueError('surroundings.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.' + 'Check the following column(s) for CEA-3 format: {list_missing_attributes_surroundings_3}.'.format(list_missing_attributes_surroundings_3=list_missing_attributes_surroundings_3), + 'Check the following column(s) for CEA-4 format: {list_missing_attributes_surroundings_4}.'.format(list_missing_attributes_surroundings_4=list_missing_attributes_surroundings_4) + ) + else: + print('+ (optional) Run Surroundings-Helper to generate surroundings.shp.') + + #2. about the .dbf files in the building-properties folder to be migrated to .csv files + if 'air_conditioning' in list_missing_files_csv_building_properties_4 and not list_missing_columns_air_conditioning_4: + if 'air_conditioning' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3) + else: + print("+ Ensure either air_conditioning.dbf or air_conditioning.csv is present in building-properties folder. Run Archetypes-Helper to generate air_conditioning.csv.") + elif 'air_conditioning' not in list_missing_files_csv_building_properties_4 and list_missing_columns_air_conditioning_4: + print('+ Ensure column(s) are present in air_conditioning.csv: {list_missing_columns_air_conditioning_4}.'.format(list_missing_columns_air_conditioning_4=list_missing_columns_air_conditioning_4)) + else: + pass + + if 'architecture' in list_missing_files_csv_building_properties_4 and not list_missing_columns_architecture_4: + if 'architecture' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3) + else: + print("+ Ensure either architecture.dbf or architecture.csv is present in building-properties folder. Run Archetypes-Helper to generate architecture.csv.") + elif 'architecture' not in list_missing_files_csv_building_properties_4 and list_missing_columns_architecture_4: + print('Ensure column(s) are present in architecture.csv: {list_missing_columns_architecture_4}.'.format(list_missing_columns_architecture_4=list_missing_columns_architecture_4)) + else: + pass + + if 'indoor_comfort' in list_missing_files_csv_building_properties_4 and not list_missing_columns_indoor_comfort_4: + if 'indoor_comfort' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3) + else: + print("+ Ensure either indoor_comfort.dbf or indoor_comfort.csv is present in building-properties folder. Run Archetypes-Helper to generate indoor_comfort.csv.") + elif 'indoor_comfort' not in list_missing_files_csv_building_properties_4 and list_missing_columns_indoor_comfort_4: + print('+ Ensure column(s) are present in indoor_comfort.csv: {list_missing_columns_indoor_comfort_4}.'.format(list_missing_columns_indoor_comfort_4=list_missing_columns_indoor_comfort_4)) + else: + pass + + if 'internal_loads' in list_missing_files_csv_building_properties_4 and not list_missing_columns_internal_loads_4: + if 'internal_loads' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3) + else: + print("+ Ensure either internal_loads.dbf or internal_loads.csv is present in building-properties folder. Run Archetypes-Helper to generate internal_loads.csv.") + elif 'internal_loads' not in list_missing_files_csv_building_properties_4 and list_missing_columns_internal_loads_4: + print('+ Ensure column(s) are present in internal_loads.csv: {list_missing_columns_internal_loads_4}.'.format(list_missing_columns_internal_loads_4=list_missing_columns_internal_loads_4)) + else: + pass + + if 'supply_systems' in list_missing_files_csv_building_properties_4 and not list_missing_columns_supply_systems_4: + if 'supply_systems' not in list_missing_files_dbf_building_properties_3: + migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3) + else: + print("+ Ensure either supply_systems.dbf or supply_systems.csv is present in building-properties folder. Run Archetypes-Helper to generate supply_systems.csv.") + elif 'supply_systems' not in list_missing_files_csv_building_properties_4 and list_missing_columns_supply_systems_4: + print('+ Ensure column(s) are present in supply_systems.csv: {list_missing_columns_supply_system_4}.'.format(list_missing_columns_supply_system_4=list_missing_columns_supply_systems_4)) + else: + pass + + if 'typology' not in list_missing_files_dbf_building_properties_3: + typology_path = path_to_input_file_without_db_3(scenario, 'typology') + if os.path.exists(typology_path): + os.remove(typology_path) + print('+ typology.dbf has been removed as it is no longer needed by CEA-4.') + + + #3. about the Database + + + # Print: End + print("-" * 60) + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + + +def main(config): + # Start the timer + t0 = time.perf_counter() + assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + + scenario = config.scenario + scenario_name = os.path.basename(scenario) + + # Print: Start + div_len = 37 - len(scenario_name) + print('-' * 50) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) + + # Execute the verification + migrate_cea3_to_cea4(scenario) + + # Execute the verification again + dict_missing = cea4_verify(scenario) + + # Print the verification results + print_verification_results_4(scenario_name, dict_missing) + + # Print: End + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + print('+' * 60) + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + print('The entire process of data migration from CEA-3 to CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) + +if __name__ == '__main__': + main(cea.config.Configuration()) diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py new file mode 100644 index 000000000..19861a6aa --- /dev/null +++ b/cea/datamanagement/format_helper/cea4_verify.py @@ -0,0 +1,373 @@ +""" +Verify the format of the input data for CEA-4 model. + +""" + +import os +import cea.config +import time +import geopandas as gpd +import pandas as pd + + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" + + +SHAPEFILES = ['zone', 'surroundings'] +COLUMNS_ZONE_4 = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag', + 'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r'] +COLUMNS_SURROUNDINGS_4 = ['name', 'height_ag', 'floors_ag'] +CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems'] +COLUMNS_AIR_CONDITIONING_4 = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent', + 'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends'] +COLUMNS_ARCHITECTURE_4 = ['name', + 'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south', + 'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall', + 'type_win', 'type_shade'] +COLUMNS_INDOOR_COMFORT_4 = ['name', + 'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc'] +COLUMNS_INTERNAL_LOADS_4 = ['name', + 'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2', + 'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2'] +COLUMNS_SUPPLY_SYSTEMS_4 = ['name', + 'type_cs', 'type_hs', 'type_dhw', 'type_el'] + +## -------------------------------------------------------------------------------------------------------------------- +## The paths to the input files for CEA-4 +## -------------------------------------------------------------------------------------------------------------------- + +# The paths are relatively hardcoded for now without using the inputlocator script. +# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script. +def path_to_input_file_without_db_4(scenario, item): + + if item == "zone": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp") + elif item == "surroundings": + path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp") + elif item == "air_conditioning": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.csv") + elif item == "architecture": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.csv") + elif item == "indoor_comfort": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.csv") + elif item == "internal_loads": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.csv") + elif item == "supply_systems": + path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.csv") + elif item == 'streets': + path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp") + elif item == 'terrain': + path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif") + elif item == 'weather': + path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw") + else: + raise ValueError(f"Unknown item {item}") + + return path_to_input_file + + +## -------------------------------------------------------------------------------------------------------------------- +## Helper functions +## -------------------------------------------------------------------------------------------------------------------- + +def verify_shp(scenario, item, required_attributes): + """ + Verify if a shapefile contains all required attributes. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Either "zone" or "surroundings". + required_attributes (list): List of attribute names to verify. + + Returns: + A list of missing attributes, or an empty list if all attributes are present. + """ + # Construct the shapefile path + shapefile_path = path_to_input_file_without_db_4(scenario, item) + + # Check if the shapefile exists + if not os.path.isfile(shapefile_path): + raise FileNotFoundError(f"Shapefile not found: {shapefile_path}") + + # Load the shapefile + try: + gdf = gpd.read_file(shapefile_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + + # Get the column names from the shapefile's attribute table + shapefile_columns = gdf.columns.tolist() + + # Check for missing attributes + missing_attributes = [attr for attr in required_attributes if attr not in shapefile_columns] + + return missing_attributes + + +def verify_csv_4(scenario, item, required_columns): + """ + Verify if a CSV file contains all required columns. + + Parameters: + scenario (str): Path or identifier for the scenario. + item (str): Identifier for the CSV file. + required_columns (list): List of column names to verify. + + Returns: + A list of missing columns, or an empty list if all columns are present. + """ + # Construct the CSV file path + csv_path = path_to_input_file_without_db_4(scenario, item) + + # Check if the CSV file exists + if not os.path.isfile(csv_path): + raise FileNotFoundError(f"CSV file not found: {csv_path}") + + # Load the CSV file + try: + df = pd.read_csv(csv_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + + # Get the column names from the CSV file + csv_columns = df.columns.tolist() + + # Check for missing columns + missing_columns = [col for col in required_columns if col not in csv_columns] + + return missing_columns + + +def verify_file_exists_4(scenario, items): + """ + Verify if the files in the provided list exist for a given scenario. + + Parameters: + scenario (str): Path or identifier for the scenario. + items (list): List of file identifiers to check. + + Returns: + list: A list of missing file identifiers, or an empty list if all files exist. + """ + list_missing_files = [] + for file in items: + path = path_to_input_file_without_db_4(scenario, file) + if not os.path.isfile(path): + list_missing_files.append(file) + return list_missing_files + + +def verify_name_duplicates_4(scenario, item): + """ + Verify if there are duplicate names in the 'name' column of a .csv or .shp file. + + Parameters: + file_path (str): Path to the input file (either .csv or .shp). + + Returns: + list: A list of duplicate names, or an empty list if no duplicates are found. + """ + # Construct the CSV file path + file_path = path_to_input_file_without_db_4(scenario, item) + + # Check file type and load as a DataFrame + if file_path.endswith('.csv'): + try: + df = pd.read_csv(file_path) + except Exception as e: + raise ValueError(f"Error reading CSV file: {e}") + elif file_path.endswith('.shp'): + try: + df = gpd.read_file(file_path) + except Exception as e: + raise ValueError(f"Error reading shapefile: {e}") + else: + raise ValueError("Unsupported file type. Please provide a .csv or .shp file.") + + # Find duplicate names + list_names_duplicated = df['name'][df['name'].duplicated()].tolist() + + return list_names_duplicated + + +def print_verification_results_4(scenario_name, dict_missing): + + if all(not value for value in dict_missing.values()): + print("✓" * 3) + print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name), + 'input building-geometries ShapeFiles: [zone and surroundings], ' + 'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4) + ) + else: + print("!" * 3) + print('All or some of input data files/columns are missing or incompatible with the current version of CEA-4 for Scenario: {scenario}. '.format(scenario=scenario_name), + 'If you are migrating your input data from CEA-3 to CEA-4 format, set the toggle `migrate_from_cea_3` to `True` and run the script again. ' + 'If you manually prepared the input data, check the log for missing files and/or incompatible columns. Modify your input data according to the log above.' + ) + + +def verify_csv_file(scenario, item, required_columns, print_results=False): + """ + Verify a CSV file's columns and name uniqueness. + + Args: + scenario: The scenario path + item: The item name (e.g., 'air_conditioning') + required_columns: List of required columns + print_results: Whether to print verification results + + Returns: + list: List of missing columns + """ + list_missing_columns = verify_csv_4(scenario, item, required_columns) + if list_missing_columns: + if print_results: + print(f'+ Ensure column(s) are present in the {item}.csv: {list_missing_columns}') + else: + if 'name' not in list_missing_columns: + list_names_duplicated = verify_name_duplicates_4(scenario, item) + if list_names_duplicated and print_results: + print(f'Ensure name(s) are unique in {item}.csv: {list_names_duplicated} is duplicated.') + return list_missing_columns + +## -------------------------------------------------------------------------------------------------------------------- +## Unique traits for the CEA-4 format +## -------------------------------------------------------------------------------------------------------------------- + +def cea4_verify(scenario, print_results=False): + + #1. about zone.shp and surroundings.shp + list_missing_attributes_zone = [] + list_missing_attributes_surroundings = [] + list_missing_files_shp_building_geometry = verify_file_exists_4(scenario, SHAPEFILES) + + if 'zone' not in list_missing_files_shp_building_geometry: + list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4) + if list_missing_attributes_zone: + if print_results: + print('+ Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone)) + if 'name' not in list_missing_attributes_zone: + list_names_duplicated = verify_name_duplicates_4(scenario, 'zone') + if list_names_duplicated: + if print_results: + print('+ Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + if 'surroundings' not in list_missing_files_shp_building_geometry: + list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4) + if list_missing_attributes_surroundings: + if print_results: + print('+ Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings)) + if 'name' not in list_missing_attributes_surroundings: + list_names_duplicated = verify_name_duplicates_4(scenario, 'surroundings') + if list_names_duplicated: + if print_results: + print('+ Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated)) + + #2. about .csv files under the "inputs/building-properties" folder + list_missing_columns_air_conditioning = [] + list_missing_columns_architecture = [] + list_missing_columns_indoor_comfort = [] + list_missing_columns_internal_loads = [] + list_missing_columns_supply_systems = [] + + list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4) + if list_missing_files_csv_building_properties: + if print_results: + print('+ Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties)) + + for item, columns in [ + ('air_conditioning', COLUMNS_AIR_CONDITIONING_4), + ('architecture', COLUMNS_ARCHITECTURE_4), + ('indoor_comfort', COLUMNS_INDOOR_COMFORT_4), + ('internal_loads', COLUMNS_INTERNAL_LOADS_4), + ('supply_systems', COLUMNS_SUPPLY_SYSTEMS_4) + ]: + if item not in list_missing_files_csv_building_properties: + locals()[f'list_missing_columns_{item}'] = verify_csv_file( + scenario, item, columns, print_results=print_results + ) + #3. verify if terrain.tif, weather.epw and streets.shp exist + list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain']) + if list_missing_files_terrain: + if print_results: + print('+ Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.') + + list_missing_files_weather = verify_file_exists_4(scenario, ['weather']) + if list_missing_files_weather: + if print_results: + print('+ Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.') + + list_missing_files_streets = verify_file_exists_4(scenario, ['streets']) + if list_missing_files_streets: + if print_results: + print('+ Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.') + + #4. verify the DB under the "inputs/technology/" folder + list_missing_files_db = [] + + # Compile the results + dict_missing = { + 'building-geometry': list_missing_files_shp_building_geometry, + 'zone': list_missing_attributes_zone, + 'surroundings': list_missing_attributes_surroundings, + 'building-properties': list_missing_files_csv_building_properties, + 'air_conditioning': list_missing_columns_air_conditioning, + 'architecture': list_missing_columns_architecture, + 'indoor_comfort': list_missing_columns_indoor_comfort, + 'internal_loads': list_missing_columns_internal_loads, + 'supply_systems': list_missing_columns_supply_systems, + 'terrain': list_missing_files_terrain, + 'weather': list_missing_files_weather, + 'streets': list_missing_files_streets, + 'db': list_missing_files_db + } + + # Print: End + if print_results: + print("-" * 50) + + return dict_missing + + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + + +def main(config): + # Start the timer + t0 = time.perf_counter() + assert os.path.exists(config.general.project), 'input file not found: %s' % config.project + + # Get the scenario name + scenario = config.scenario + scenario_name = os.path.basename(scenario) + + # Print: Start + div_len = 37 - len(scenario_name) + print('+' * 60) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len) + + # Execute the verification + dict_missing = cea4_verify(scenario, print_results=True) + + # Print the results + print_verification_results_4(scenario_name, dict_missing) + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + + # Print: End + # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50) + print('+' * 60) + print('The entire process of CEA-4 format verification is now completed - time elapsed: %.2f seconds' % time_elapsed) + +if __name__ == '__main__': + main(cea.config.Configuration()) diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py new file mode 100644 index 000000000..5b8fe7da9 --- /dev/null +++ b/cea/datamanagement/format_helper/format_helper.py @@ -0,0 +1,86 @@ +""" +Main script of the formate helper that activates the verification and migration as needed. + +""" + +import os +import cea.config +import time +from cea.datamanagement.format_helper.cea4_verify import cea4_verify, print_verification_results_4 +from cea.datamanagement.format_helper.cea4_migrate import migrate_cea3_to_cea4 + +__author__ = "Zhongming Shi" +__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich" +__credits__ = ["Zhongming Shi"] +__license__ = "MIT" +__version__ = "0.1" +__maintainer__ = "Reynold Mok" +__email__ = "cea@arch.ethz.ch" +__status__ = "Production" + + +def exec_cea_format_helper(config, cea_scenario): + # auto-migrate from CEA-3 to CEA-4 + bool_migrate = config.format_helper.migrate_from_cea_3 + scenario_name = os.path.basename(cea_scenario) + + if not bool_migrate: + # subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True,capture_output=True) + dict_missing = cea4_verify(cea_scenario, print_results=True) + print_verification_results_4(scenario_name, dict_missing) + + else: + # subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, capture_output=True) + migrate_cea3_to_cea4(cea_scenario) + dict_missing = cea4_verify(cea_scenario) + print_verification_results_4(scenario_name, dict_missing) + + +## -------------------------------------------------------------------------------------------------------------------- +## Main function +## -------------------------------------------------------------------------------------------------------------------- + +def main(config): + """ + Batch processing all selected scenarios under a project. + + :param config: the configuration object to use + :type config: cea.config.Configuration + :return: + """ + + # Start the timer + t0 = time.perf_counter() + + assert os.path.exists(config.general.project), 'input file not found: %s' % config.general.project + + project_path = config.general.project + scenarios_list = config.format_helper.scenarios_to_verify_and_migrate + + print('+' * 60) + print(f'Format Helper is batch-processing the data verification and migration for Scenarios: {scenarios_list}.') + + # Loop over one or all selected scenarios under the project + for scenario in scenarios_list: + # Ignore hidden directories + if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): + continue + + # Print: Start + div_len = 37 - len(scenario) + print('+' * 60) + print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario) + "-" * div_len) + + cea_scenario = os.path.join(project_path, scenario) + # executing CEA commands + exec_cea_format_helper(config, cea_scenario) + + # Print the time used for the entire processing + time_elapsed = time.perf_counter() - t0 + print('+' * 60) + print( + 'The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed) + + +if __name__ == '__main__': + main(cea.config.Configuration()) diff --git a/cea/default.config b/cea/default.config index 7a61de74b..d4644f451 100644 --- a/cea/default.config +++ b/cea/default.config @@ -360,9 +360,9 @@ operational.type = BooleanParameter operational.help = Estimate Operational costs (variable and fixed) due to supply systems operation (allocated according to the supply-systems input database) [emissions] -year-to-calculate = 2024 +year-to-calculate = 2025 year-to-calculate.type = IntegerParameter -year-to-calculate.help = Defines the time period (year-to-calculate - year_built or year_renovated) over which embodied emissions are calculated. If greater than 60 years, embodied emissions are not considered. +year-to-calculate.help = Defines the year to calculate the emissions. If a building is greater than 60 years, embodied emissions are not considered. embodied = true embodied.type = BooleanParameter @@ -610,11 +610,19 @@ variable-5-upper-bound.type = RealParameter variable-5-upper-bound.help = Upper bound of Variable 5. variable-5-upper-bound.nullable = true +[format-helper] +scenarios-to-verify-and-migrate = +scenarios-to-verify-and-migrate.type = ScenarioNameMultiChoiceParameter +scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and migrate). Leave blank to select all Scenarios. + +migrate-from-cea-3 = true +migrate-from-cea-3.type = BooleanParameter +migrate-from-cea-3.help = True to auto-migrate the input format to the current version of CEA-4 from CEA-3. Note the process of migration is not reversible. [batch-process-workflow] scenarios-to-simulate = scenarios-to-simulate.type = ScenarioNameMultiChoiceParameter -scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all scenarios. Consider excluding the reference-Scenario when import-from-rhino-gh is set to True. +scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all Scenarios. Exclude the reference-Scenario when import-from-rhino-gh is set to True. export-to-rhino-gh = false export-to-rhino-gh.type = BooleanParameter diff --git a/cea/scripts.yml b/cea/scripts.yml index 964d870c0..fe5ad1993 100644 --- a/cea/scripts.yml +++ b/cea/scripts.yml @@ -357,6 +357,34 @@ Data Management: Utilities: + - name: cea4-format-helper + label: CEA-4 Format Helper + description: | + Verifies the inputs are in the correct format for CEA-4. + Migrates Late-CEA-3 inputs data - Note the process of migration is not reversible. + + interfaces: [cli, dashboard] + module: cea.datamanagement.format_helper.format_helper + parameters: ['general:scenario',format-helper] + + - name: cea4-migrate + label: CEA-4 Format Helper - Migrate + description: | + Migrates Late-CEA-3 inputs data - Note the process of migration is not reversible. + + interfaces: [cli] + module: cea.datamanagement.format_helper.cea4_migrate + parameters: ['general:scenario'] + + - name: cea4-verify + label: CEA-4 Format Helper - Verify + description: | + Verifies the inputs are in the correct format for CEA-4. + + interfaces: [cli] + module: cea.datamanagement.format_helper.cea4_verify + parameters: ['general:scenario'] + - name: sensitivity-analysis-sampler label: Generate Samples for Sensitivity Analysis (SA) description: Generate samples for sensitivity analysis using Sobol Method. diff --git a/cea/utilities/batch_process_workflow.py b/cea/utilities/batch_process_workflow.py index 9e2dc7862..95523e246 100644 --- a/cea/utilities/batch_process_workflow.py +++ b/cea/utilities/batch_process_workflow.py @@ -156,7 +156,7 @@ def exec_cea_commands(config, cea_scenario): def main(config): """ - Batch processing all scenarios under a project. + Batch processing all selectedscenarios under a project. :param config: the configuration object to use :type config: cea.config.Configuration @@ -172,7 +172,7 @@ def main(config): scenario_name = config.general.scenario_name scenarios_list = config.batch_process_workflow.scenarios_to_simulate - # Loop over one or all scenarios under the project + # Loop over one or all selected scenarios under the project for scenario in scenarios_list: # Ignore hidden directories if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)): @@ -184,7 +184,7 @@ def main(config): # executing CEA commands exec_cea_commands(config, cea_scenario) except subprocess.CalledProcessError as e: - print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}") + print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}.") err_msg = e.stderr if err_msg is not None: print(err_msg.decode())