diff --git a/cea/datamanagement/format_helper/__init__.py b/cea/datamanagement/format_helper/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/cea/datamanagement/format_helper/cea4_migrate.py b/cea/datamanagement/format_helper/cea4_migrate.py
new file mode 100644
index 000000000..96e4e4fe9
--- /dev/null
+++ b/cea/datamanagement/format_helper/cea4_migrate.py
@@ -0,0 +1,417 @@
+"""
+Mirgate the format of the input data to CEA-4 format after verification.
+
+"""
+
+
+import os
+import cea.config
+import time
+import pandas as pd
+import geopandas as gpd
+
+
+
+__author__ = "Zhongming Shi"
+__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich"
+__credits__ = ["Zhongming Shi"]
+__license__ = "MIT"
+__version__ = "0.1"
+__maintainer__ = "Reynold Mok"
+__email__ = "cea@arch.ethz.ch"
+__status__ = "Production"
+
+from cea.datamanagement.format_helper.cea4_verify import cea4_verify, verify_shp, \
+    COLUMNS_ZONE_4, print_verification_results_4, path_to_input_file_without_db_4
+from cea.utilities.dbf import dbf_to_dataframe
+
+COLUMNS_ZONE_3 = ['Name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag']
+CSV_BUILDING_PROPERTIES_3 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems', 'typology']
+
+COLUMNS_TYPOLOGY_3 = ['Name', 'YEAR', 'STANDARD', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R']
+COLUMNS_SURROUNDINGS_3 = ['Name', 'height_ag', 'floors_ag']
+COLUMNS_AIR_CONDITIONING_3 = ['Name',
+                            'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent',
+                            'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends']
+COLUMNS_ARCHITECTURE_3 = ['Name',
+                        'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south',
+                        'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall',
+                        'type_win', 'type_shade']
+COLUMNS_INDOOR_COMFORT_3 = ['Name',
+                          'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc']
+COLUMNS_INTERNAL_LOADS_3 = ['Name',
+                          'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2',
+                          'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2']
+COLUMNS_SUPPLY_SYSTEMS_3 = ['Name',
+                          'type_cs', 'type_hs', 'type_dhw', 'type_el']
+columns_mapping_dict_name = {'Name': 'name'}
+columns_mapping_dict_typology = {'YEAR': 'year',
+                                 'STANDARD': 'const_type',
+                                 '1ST_USE': 'use_type1',
+                                 '1ST_USE_R': 'use_type1r',
+                                 '2ND_USE': 'use_type2',
+                                 '2ND_USE_R': 'use_type2r',
+                                 '3RD_USE': 'use_type3',
+                                 '3RD_USE_R': 'use_type3r'
+                                 }
+COLUMNS_ZONE_TYPOLOGY_3 = ['Name', 'STANDARD', 'YEAR', '1ST_USE', '1ST_USE_R', '2ND_USE', '2ND_USE_R', '3RD_USE', '3RD_USE_R']
+
+## --------------------------------------------------------------------------------------------------------------------
+## The paths to the input files for CEA-3
+## --------------------------------------------------------------------------------------------------------------------
+
+# The paths are relatively hardcoded for now without using the inputlocator script.
+# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script.
+def path_to_input_file_without_db_3(scenario, item):
+
+    if item == "zone":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp")
+    elif item == "surroundings":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp")
+    elif item == "air_conditioning":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.dbf")
+    elif item == "architecture":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.dbf")
+    elif item == "indoor_comfort":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.dbf")
+    elif item == "internal_loads":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.dbf")
+    elif item == "supply_systems":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.dbf")
+    elif item == "typology":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "typology.dbf")
+    elif item == 'streets':
+        path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp")
+    elif item == 'terrain':
+        path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif")
+    elif item == 'weather':
+        path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw")
+    else:
+        raise ValueError(f"Unknown item {item}")
+
+    return path_to_input_file
+
+
+## --------------------------------------------------------------------------------------------------------------------
+## Helper functions
+## --------------------------------------------------------------------------------------------------------------------
+
+
+def verify_name_duplicates_3(scenario, item):
+    """
+    Verify if there are duplicate names in the 'name' column of a .csv or .shp file.
+
+    Parameters:
+        file_path (str): Path to the input file (either .csv or .shp).
+
+    Returns:
+        list: A list of duplicate names, or an empty list if no duplicates are found.
+    """
+    # Construct the CSV file path
+    file_path = path_to_input_file_without_db_3(scenario, item)
+
+    # Check file type and load as a DataFrame
+    if file_path.endswith('.dbf'):
+        try:
+            df = dbf_to_dataframe(file_path)
+        except Exception as e:
+            raise ValueError(f"Error reading DBF file: {e}")
+    elif file_path.endswith('.shp'):
+        try:
+            df = gpd.read_file(file_path)
+        except Exception as e:
+            raise ValueError(f"Error reading shapefile: {e}")
+    else:
+        raise ValueError("Unsupported file type. Please provide a .csv or .shp file.")
+
+    # Find duplicate names
+    list_names_duplicated = df['Name'][df['Name'].duplicated()].tolist()
+
+    return list_names_duplicated
+
+def verify_dbf_3(scenario, item, required_columns):
+    """
+    Verify if a DBF file contains all required columns.
+
+    Parameters:
+        scenario (str): Path or identifier for the scenario.
+        item (str): Identifier for the CSV file.
+        required_columns (list): List of column names to verify.
+
+    Returns:
+        A list of missing columns, or an empty list if all columns are present.
+    """
+    # Construct the CSV file path
+    dbf_path = path_to_input_file_without_db_3(scenario, item)
+
+    # Check if the CSV file exists
+    if not os.path.isfile(dbf_path):
+        raise FileNotFoundError(f"CSV file not found: {dbf_path}")
+
+    # Load the CSV file
+    try:
+        df = dbf_to_dataframe(dbf_path)
+    except Exception as e:
+        raise ValueError(f"Error reading CSV file: {e}")
+
+    # Get the column names from the CSV file
+    dbf_columns = df.columns.tolist()
+
+    # Check for missing columns
+    missing_columns = [col for col in required_columns if col not in dbf_columns]
+
+    return missing_columns
+
+
+def replace_shapefile_dbf(scenario, item, new_dataframe, list_attributes_3):
+    """
+    Replace the DBF file of a shapefile with the contents of a new DataFrame,
+    ensuring matching of `['Name']` in the shapefile and `['name']` in the new DataFrame.
+
+    :param shapefile_path: Path to the shapefile (without file extension).
+    :param new_dataframe: pandas DataFrame with the new data to replace the DBF file.
+    """
+    # Load the original shapefile
+    shapefile_path = path_to_input_file_without_db_3(scenario, item)
+    gdf = gpd.read_file(shapefile_path)
+
+    # Convert the DataFrame to a GeoDataFrame
+    new_gdf = gpd.GeoDataFrame(new_dataframe, geometry=gdf['geometry'], crs=gdf.crs)  # Replace CRS with your specific CRS
+
+    # Save the updated shapefile
+    new_gdf.to_file(shapefile_path, driver="ESRI Shapefile")
+
+def verify_file_exists_3(scenario, items):
+    """
+    Verify if the files in the provided list exist for a given scenario.
+
+    Parameters:
+        scenario (str): Path or identifier for the scenario.
+        items (list): List of file identifiers to check.
+
+    Returns:
+        list: A list of missing file identifiers, or an empty list if all files exist.
+    """
+    list_missing_files = []
+    for file in items:
+        path = path_to_input_file_without_db_3(scenario, file)
+        if not os.path.isfile(path):
+            list_missing_files.append(file)
+    return list_missing_files
+
+
+def migrate_dbf_to_csv(scenario, item, required_columns):
+    """
+    Migrate a DBF file to CSV format with column renaming.
+
+    Args:
+        scenario: The scenario path
+        item: The item name (e.g., 'air_conditioning')
+        required_columns: List of required columns
+    """
+    list_missing_columns = verify_dbf_3(scenario, item, required_columns)
+    if list_missing_columns:
+        print(f'+ Ensure column(s) are present in the {item}.dbf: {list_missing_columns}')
+    else:
+        if 'Name' not in list_missing_columns:
+            list_names_duplicated = verify_name_duplicates_3(scenario, item)
+            if list_names_duplicated:
+                print(f'+ Ensure name(s) are unique in {item}.dbf: {list_names_duplicated} is duplicated.')
+            else:
+                df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, item))
+                df.rename(columns=columns_mapping_dict_name, inplace=True)
+                df.rename(columns=columns_mapping_dict_typology, inplace=True)
+                df.to_csv(path_to_input_file_without_db_4(scenario, item), index=False)
+                os.remove(path_to_input_file_without_db_3(scenario, item))
+                print(f'+ {item}.dbf has been migrated from CEA-3 to CEA-4 format.')
+
+## --------------------------------------------------------------------------------------------------------------------
+## Migrate to CEA-4 format from CEA-3 format
+## --------------------------------------------------------------------------------------------------------------------
+
+def migrate_cea3_to_cea4(scenario):
+
+    #0. verify if everything is already in the correct format for CEA-4
+    dict_missing = cea4_verify(scenario)
+    if all(not value for value in dict_missing.values()):
+        pass
+        # print("✓" * 3)
+        # print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name),
+        #       'input building-geometries ShapeFiles: [zone and surroundings], '
+        #       'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4),
+        #       )
+        # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50)
+
+    else:
+        # Verify missing files for CEA-3 and CEA-4 formats
+        list_missing_files_shp_building_geometry_4 = dict_missing.get('building-geometry')
+        list_missing_files_dbf_building_properties_3 = verify_file_exists_3(scenario, CSV_BUILDING_PROPERTIES_3)
+        list_missing_files_csv_building_properties_4 = dict_missing.get('building-properties')
+
+        # Verify missing attributes/columns for CEA-4 format
+        list_missing_attributes_zone_4 = dict_missing.get('zone')
+        list_missing_attributes_surroundings_4 = dict_missing.get('surroundings')
+        list_missing_columns_air_conditioning_4 = dict_missing.get('air_conditioning')
+        list_missing_columns_architecture_4 = dict_missing.get('architecture')
+        list_missing_columns_indoor_comfort_4 = dict_missing.get('indoor_comfort')
+        list_missing_columns_internal_loads_4 = dict_missing.get('internal_loads')
+        list_missing_columns_supply_systems_4 = dict_missing.get('supply_systems')
+
+        #1. about zone.shp and surroundings.shp
+        if 'zone' not in list_missing_files_shp_building_geometry_4:
+            list_missing_attributes_zone_3 = verify_shp(scenario, 'zone', COLUMNS_ZONE_3)
+            if not list_missing_attributes_zone_3 and list_missing_attributes_zone_4:
+                # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp follows the CEA-3 format.')
+                zone_df_3 = gpd.read_file(path_to_input_file_without_db_3(scenario, 'zone'))
+                zone_df_3.rename(columns=columns_mapping_dict_name, inplace=True)
+                if 'typology' not in list_missing_files_dbf_building_properties_3:
+                    list_missing_attributes_typology_3 = verify_dbf_3(scenario, 'typology', COLUMNS_TYPOLOGY_3)
+                    if not list_missing_attributes_typology_3 and list_missing_attributes_zone_4:
+                        # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'typology.shp follows the CEA-3 format.')
+                        typology_df = dbf_to_dataframe(path_to_input_file_without_db_3(scenario, 'typology'))
+                        typology_df = typology_df[COLUMNS_ZONE_TYPOLOGY_3]
+                        typology_df.rename(columns=columns_mapping_dict_typology, inplace=True)
+                        zone_df_4 = pd.merge(zone_df_3, typology_df, left_on=['name'], right_on=["Name"], how='left')
+                        zone_df_4.drop(columns=['Name'], inplace=True)
+                        zone_df_4 = zone_df_4[COLUMNS_ZONE_4]
+                        replace_shapefile_dbf(scenario, 'zone', zone_df_4, COLUMNS_ZONE_3)
+                        print('+ zone.shp and typology.dbf have been merged and migrated to CEA-4 format.')
+                    else:
+                        raise ValueError('+ typology.dbf exists but does not follow the CEA-3 format. CEA cannot proceed with the data migration. '
+                                         'Check the following column(s) for CEA-3 format: {list_missing_attributes_typology_3}'.format(list_missing_attributes_typology_3=list_missing_attributes_typology_3)
+                                         )
+                else:
+                    print("+ CEA is unable to produce a zone.shp compatible to CEA-4 format. To enable the migration, ensure typology.dbf is present in building-properties folder for CEA-3 format.")
+
+            elif list_missing_attributes_zone_3 and not list_missing_attributes_zone_4:
+                pass
+                # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'zone.shp already follows the CEA-4 format.')
+            else:
+                raise ValueError('+ zone.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.'
+                                 'Check the following column(s) for CEA-3 format: {list_missing_attributes_zone_3}.'.format(list_missing_attributes_zone_3=list_missing_attributes_zone_3),
+                                 'Check the following column(s) for CEA-4 format: {list_missing_attributes_zone_4}.'.format(list_missing_attributes_zone_4=list_missing_attributes_zone_4)
+                                 )
+        else:
+            print("+ Ensure zone.shp is present in building-geometry folder.")
+
+        if 'surroundings' not in list_missing_files_shp_building_geometry_4:
+            list_missing_attributes_surroundings_3 = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_3)
+            if not list_missing_attributes_surroundings_3 and list_missing_attributes_surroundings_4:
+                # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp follows the CEA-3 format.')
+                surroundings_df = gpd.read_file(path_to_input_file_without_db_3(scenario, 'surroundings'))
+                surroundings_df.rename(columns=columns_mapping_dict_name, inplace=True)
+                replace_shapefile_dbf(scenario, 'surroundings', surroundings_df, COLUMNS_SURROUNDINGS_3)
+                print('+ surroundings.shp has been migrated from CEA-3 to CEA-4 format.')
+
+            elif list_missing_attributes_surroundings_3 and not list_missing_attributes_surroundings_4:
+                pass
+                # print('For Scenario: {scenario}, '.format(scenario=scenario_name), 'surroundings.shp already follows the CEA-4 format.')
+            else:
+                raise ValueError('surroundings.shp exists but follows neither the CEA-3 nor CEA-4 format. CEA cannot proceed with the data migration.'
+                                 'Check the following column(s) for CEA-3 format: {list_missing_attributes_surroundings_3}.'.format(list_missing_attributes_surroundings_3=list_missing_attributes_surroundings_3),
+                                 'Check the following column(s) for CEA-4 format: {list_missing_attributes_surroundings_4}.'.format(list_missing_attributes_surroundings_4=list_missing_attributes_surroundings_4)
+                                 )
+        else:
+            print('+ (optional) Run Surroundings-Helper to generate surroundings.shp.')
+
+        #2. about the .dbf files in the building-properties folder to be migrated to .csv files
+        if 'air_conditioning' in list_missing_files_csv_building_properties_4 and not list_missing_columns_air_conditioning_4:
+            if 'air_conditioning' not in list_missing_files_dbf_building_properties_3:
+                migrate_dbf_to_csv(scenario, 'air_conditioning', COLUMNS_AIR_CONDITIONING_3)
+            else:
+                print("+ Ensure either air_conditioning.dbf or air_conditioning.csv is present in building-properties folder. Run Archetypes-Helper to generate air_conditioning.csv.")
+        elif 'air_conditioning' not in list_missing_files_csv_building_properties_4 and list_missing_columns_air_conditioning_4:
+            print('+ Ensure column(s) are present in air_conditioning.csv: {list_missing_columns_air_conditioning_4}.'.format(list_missing_columns_air_conditioning_4=list_missing_columns_air_conditioning_4))
+        else:
+            pass
+
+        if 'architecture' in list_missing_files_csv_building_properties_4 and not list_missing_columns_architecture_4:
+            if 'architecture' not in list_missing_files_dbf_building_properties_3:
+                migrate_dbf_to_csv(scenario, 'architecture', COLUMNS_ARCHITECTURE_3)
+            else:
+                print("+ Ensure either architecture.dbf or architecture.csv is present in building-properties folder. Run Archetypes-Helper to generate architecture.csv.")
+        elif 'architecture' not in list_missing_files_csv_building_properties_4 and list_missing_columns_architecture_4:
+            print('Ensure column(s) are present in architecture.csv: {list_missing_columns_architecture_4}.'.format(list_missing_columns_architecture_4=list_missing_columns_architecture_4))
+        else:
+            pass
+
+        if 'indoor_comfort' in list_missing_files_csv_building_properties_4 and not list_missing_columns_indoor_comfort_4:
+            if 'indoor_comfort' not in list_missing_files_dbf_building_properties_3:
+                migrate_dbf_to_csv(scenario, 'indoor_comfort', COLUMNS_INDOOR_COMFORT_3)
+            else:
+                print("+ Ensure either indoor_comfort.dbf or indoor_comfort.csv is present in building-properties folder. Run Archetypes-Helper to generate indoor_comfort.csv.")
+        elif 'indoor_comfort' not in list_missing_files_csv_building_properties_4 and list_missing_columns_indoor_comfort_4:
+            print('+ Ensure column(s) are present in indoor_comfort.csv: {list_missing_columns_indoor_comfort_4}.'.format(list_missing_columns_indoor_comfort_4=list_missing_columns_indoor_comfort_4))
+        else:
+            pass
+
+        if 'internal_loads' in list_missing_files_csv_building_properties_4 and not list_missing_columns_internal_loads_4:
+            if 'internal_loads' not in list_missing_files_dbf_building_properties_3:
+                migrate_dbf_to_csv(scenario, 'internal_loads', COLUMNS_INTERNAL_LOADS_3)
+            else:
+                print("+ Ensure either internal_loads.dbf or internal_loads.csv is present in building-properties folder. Run Archetypes-Helper to generate internal_loads.csv.")
+        elif 'internal_loads' not in list_missing_files_csv_building_properties_4 and list_missing_columns_internal_loads_4:
+            print('+ Ensure column(s) are present in internal_loads.csv: {list_missing_columns_internal_loads_4}.'.format(list_missing_columns_internal_loads_4=list_missing_columns_internal_loads_4))
+        else:
+            pass
+
+        if 'supply_systems' in list_missing_files_csv_building_properties_4 and not list_missing_columns_supply_systems_4:
+            if 'supply_systems' not in list_missing_files_dbf_building_properties_3:
+                migrate_dbf_to_csv(scenario, 'supply_systems', COLUMNS_SUPPLY_SYSTEMS_3)
+            else:
+                print("+ Ensure either supply_systems.dbf or supply_systems.csv is present in building-properties folder. Run Archetypes-Helper to generate supply_systems.csv.")
+        elif 'supply_systems' not in list_missing_files_csv_building_properties_4 and list_missing_columns_supply_systems_4:
+            print('+ Ensure column(s) are present in supply_systems.csv: {list_missing_columns_supply_system_4}.'.format(list_missing_columns_supply_system_4=list_missing_columns_supply_systems_4))
+        else:
+            pass
+
+        if 'typology' not in list_missing_files_dbf_building_properties_3:
+            typology_path = path_to_input_file_without_db_3(scenario, 'typology')
+            if os.path.exists(typology_path):
+                os.remove(typology_path)
+                print('+ typology.dbf has been removed as it is no longer needed by CEA-4.')
+
+
+        #3. about the Database
+
+
+        # Print: End
+        print("-" * 60)
+
+## --------------------------------------------------------------------------------------------------------------------
+## Main function
+## --------------------------------------------------------------------------------------------------------------------
+
+
+def main(config):
+    # Start the timer
+    t0 = time.perf_counter()
+    assert os.path.exists(config.general.project), 'input file not found: %s' % config.project
+
+    scenario = config.scenario
+    scenario_name = os.path.basename(scenario)
+
+    # Print: Start
+    div_len = 37 - len(scenario_name)
+    print('-' * 50)
+    print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len)
+
+    # Execute the verification
+    migrate_cea3_to_cea4(scenario)
+
+    # Execute the verification again
+    dict_missing = cea4_verify(scenario)
+
+    # Print the verification results
+    print_verification_results_4(scenario_name, dict_missing)
+
+    # Print: End
+    # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50)
+    print('+' * 60)
+
+    # Print the time used for the entire processing
+    time_elapsed = time.perf_counter() - t0
+    print('The entire process of data migration from CEA-3 to CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed)
+
+if __name__ == '__main__':
+    main(cea.config.Configuration())
diff --git a/cea/datamanagement/format_helper/cea4_verify.py b/cea/datamanagement/format_helper/cea4_verify.py
new file mode 100644
index 000000000..19861a6aa
--- /dev/null
+++ b/cea/datamanagement/format_helper/cea4_verify.py
@@ -0,0 +1,373 @@
+"""
+Verify the format of the input data for CEA-4 model.
+
+"""
+
+import os
+import cea.config
+import time
+import geopandas as gpd
+import pandas as pd
+
+
+__author__ = "Zhongming Shi"
+__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich"
+__credits__ = ["Zhongming Shi"]
+__license__ = "MIT"
+__version__ = "0.1"
+__maintainer__ = "Reynold Mok"
+__email__ = "cea@arch.ethz.ch"
+__status__ = "Production"
+
+
+SHAPEFILES = ['zone', 'surroundings']
+COLUMNS_ZONE_4 = ['name', 'floors_bg', 'floors_ag', 'height_bg', 'height_ag',
+                'year', 'const_type', 'use_type1', 'use_type1r', 'use_type2', 'use_type2r', 'use_type3', 'use_type3r']
+COLUMNS_SURROUNDINGS_4 = ['name', 'height_ag', 'floors_ag']
+CSV_BUILDING_PROPERTIES_4 = ['air_conditioning', 'architecture', 'indoor_comfort', 'internal_loads', 'supply_systems']
+COLUMNS_AIR_CONDITIONING_4 = ['name',
+                            'type_cs', 'type_hs', 'type_dhw', 'type_ctrl', 'type_vent',
+                            'heat_starts', 'heat_ends', 'cool_starts', 'cool_ends']
+COLUMNS_ARCHITECTURE_4 = ['name',
+                        'Hs_ag', 'Hs_bg', 'Ns', 'Es', 'void_deck', 'wwr_north', 'wwr_west', 'wwr_east', 'wwr_south',
+                        'type_cons', 'type_leak', 'type_floor', 'type_part', 'type_base', 'type_roof', 'type_wall',
+                        'type_win', 'type_shade']
+COLUMNS_INDOOR_COMFORT_4 = ['name',
+                          'Tcs_set_C', 'Ths_set_C', 'Tcs_setb_C', 'Ths_setb_C', 'Ve_lsp', 'RH_min_pc', 'RH_max_pc']
+COLUMNS_INTERNAL_LOADS_4 = ['name',
+                          'Occ_m2p', 'Qs_Wp', 'X_ghp', 'Ea_Wm2', 'El_Wm2', 'Ed_Wm2', 'Ev_kWveh', 'Qcre_Wm2',
+                          'Vww_ldp', 'Vw_ldp', 'Qhpro_Wm2', 'Qcpro_Wm2', 'Epro_Wm2']
+COLUMNS_SUPPLY_SYSTEMS_4 = ['name',
+                          'type_cs', 'type_hs', 'type_dhw', 'type_el']
+
+## --------------------------------------------------------------------------------------------------------------------
+## The paths to the input files for CEA-4
+## --------------------------------------------------------------------------------------------------------------------
+
+# The paths are relatively hardcoded for now without using the inputlocator script.
+# This is because we want to iterate over all scenarios, which is currently not possible with the inputlocator script.
+def path_to_input_file_without_db_4(scenario, item):
+
+    if item == "zone":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "zone.shp")
+    elif item == "surroundings":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-geometry", "surroundings.shp")
+    elif item == "air_conditioning":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "air_conditioning.csv")
+    elif item == "architecture":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "architecture.csv")
+    elif item == "indoor_comfort":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "indoor_comfort.csv")
+    elif item == "internal_loads":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "internal_loads.csv")
+    elif item == "supply_systems":
+        path_to_input_file = os.path.join(scenario, "inputs", "building-properties", "supply_systems.csv")
+    elif item == 'streets':
+        path_to_input_file = os.path.join(scenario, "inputs", "networks", "streets.shp")
+    elif item == 'terrain':
+        path_to_input_file = os.path.join(scenario, "inputs", "topography", "terrain.tif")
+    elif item == 'weather':
+        path_to_input_file = os.path.join(scenario, "inputs", "weather", "weather.epw")
+    else:
+        raise ValueError(f"Unknown item {item}")
+
+    return path_to_input_file
+
+
+## --------------------------------------------------------------------------------------------------------------------
+## Helper functions
+## --------------------------------------------------------------------------------------------------------------------
+
+def verify_shp(scenario, item, required_attributes):
+    """
+    Verify if a shapefile contains all required attributes.
+
+    Parameters:
+        scenario (str): Path or identifier for the scenario.
+        item (str): Either "zone" or "surroundings".
+        required_attributes (list): List of attribute names to verify.
+
+    Returns:
+        A list of missing attributes, or an empty list if all attributes are present.
+    """
+    # Construct the shapefile path
+    shapefile_path = path_to_input_file_without_db_4(scenario, item)
+
+    # Check if the shapefile exists
+    if not os.path.isfile(shapefile_path):
+        raise FileNotFoundError(f"Shapefile not found: {shapefile_path}")
+
+    # Load the shapefile
+    try:
+        gdf = gpd.read_file(shapefile_path)
+    except Exception as e:
+        raise ValueError(f"Error reading shapefile: {e}")
+
+    # Get the column names from the shapefile's attribute table
+    shapefile_columns = gdf.columns.tolist()
+
+    # Check for missing attributes
+    missing_attributes = [attr for attr in required_attributes if attr not in shapefile_columns]
+
+    return missing_attributes
+
+
+def verify_csv_4(scenario, item, required_columns):
+    """
+    Verify if a CSV file contains all required columns.
+
+    Parameters:
+        scenario (str): Path or identifier for the scenario.
+        item (str): Identifier for the CSV file.
+        required_columns (list): List of column names to verify.
+
+    Returns:
+        A list of missing columns, or an empty list if all columns are present.
+    """
+    # Construct the CSV file path
+    csv_path = path_to_input_file_without_db_4(scenario, item)
+
+    # Check if the CSV file exists
+    if not os.path.isfile(csv_path):
+        raise FileNotFoundError(f"CSV file not found: {csv_path}")
+
+    # Load the CSV file
+    try:
+        df = pd.read_csv(csv_path)
+    except Exception as e:
+        raise ValueError(f"Error reading CSV file: {e}")
+
+    # Get the column names from the CSV file
+    csv_columns = df.columns.tolist()
+
+    # Check for missing columns
+    missing_columns = [col for col in required_columns if col not in csv_columns]
+
+    return missing_columns
+
+
+def verify_file_exists_4(scenario, items):
+    """
+    Verify if the files in the provided list exist for a given scenario.
+
+    Parameters:
+        scenario (str): Path or identifier for the scenario.
+        items (list): List of file identifiers to check.
+
+    Returns:
+        list: A list of missing file identifiers, or an empty list if all files exist.
+    """
+    list_missing_files = []
+    for file in items:
+        path = path_to_input_file_without_db_4(scenario, file)
+        if not os.path.isfile(path):
+            list_missing_files.append(file)
+    return list_missing_files
+
+
+def verify_name_duplicates_4(scenario, item):
+    """
+    Verify if there are duplicate names in the 'name' column of a .csv or .shp file.
+
+    Parameters:
+        file_path (str): Path to the input file (either .csv or .shp).
+
+    Returns:
+        list: A list of duplicate names, or an empty list if no duplicates are found.
+    """
+    # Construct the CSV file path
+    file_path = path_to_input_file_without_db_4(scenario, item)
+
+    # Check file type and load as a DataFrame
+    if file_path.endswith('.csv'):
+        try:
+            df = pd.read_csv(file_path)
+        except Exception as e:
+            raise ValueError(f"Error reading CSV file: {e}")
+    elif file_path.endswith('.shp'):
+        try:
+            df = gpd.read_file(file_path)
+        except Exception as e:
+            raise ValueError(f"Error reading shapefile: {e}")
+    else:
+        raise ValueError("Unsupported file type. Please provide a .csv or .shp file.")
+
+    # Find duplicate names
+    list_names_duplicated = df['name'][df['name'].duplicated()].tolist()
+
+    return list_names_duplicated
+
+
+def print_verification_results_4(scenario_name, dict_missing):
+
+    if all(not value for value in dict_missing.values()):
+        print("✓" * 3)
+        print('All inputs are verified as present and compatible with the current version of CEA-4 for Scenario: {scenario}, including:'.format(scenario=scenario_name),
+              'input building-geometries ShapeFiles: [zone and surroundings], '
+              'input building-properties .csv files: {csv_building_properties}.'.format(csv_building_properties=CSV_BUILDING_PROPERTIES_4)
+              )
+    else:
+        print("!" * 3)
+        print('All or some of input data files/columns are missing or incompatible with the current version of CEA-4 for Scenario: {scenario}. '.format(scenario=scenario_name),
+              'If you are migrating your input data from CEA-3 to CEA-4 format, set the toggle `migrate_from_cea_3` to `True` and run the script again. '
+              'If you manually prepared the input data, check the log for missing files and/or incompatible columns. Modify your input data according to the log above.'
+              )
+
+
+def verify_csv_file(scenario, item, required_columns, print_results=False):
+    """
+    Verify a CSV file's columns and name uniqueness.
+
+    Args:
+        scenario: The scenario path
+        item: The item name (e.g., 'air_conditioning')
+        required_columns: List of required columns
+        print_results: Whether to print verification results
+
+    Returns:
+        list: List of missing columns
+    """
+    list_missing_columns = verify_csv_4(scenario, item, required_columns)
+    if list_missing_columns:
+        if print_results:
+            print(f'+ Ensure column(s) are present in the {item}.csv: {list_missing_columns}')
+    else:
+        if 'name' not in list_missing_columns:
+            list_names_duplicated = verify_name_duplicates_4(scenario, item)
+            if list_names_duplicated and print_results:
+                print(f'Ensure name(s) are unique in {item}.csv: {list_names_duplicated} is duplicated.')
+    return list_missing_columns
+
+## --------------------------------------------------------------------------------------------------------------------
+## Unique traits for the CEA-4 format
+## --------------------------------------------------------------------------------------------------------------------
+
+def cea4_verify(scenario, print_results=False):
+
+    #1. about zone.shp and surroundings.shp
+    list_missing_attributes_zone = []
+    list_missing_attributes_surroundings = []
+    list_missing_files_shp_building_geometry = verify_file_exists_4(scenario, SHAPEFILES)
+
+    if 'zone' not in list_missing_files_shp_building_geometry:
+        list_missing_attributes_zone = verify_shp(scenario, 'zone', COLUMNS_ZONE_4)
+        if list_missing_attributes_zone:
+            if print_results:
+                print('+ Ensure attribute(s) are present in zone.shp: {missing_attributes_zone}'.format(missing_attributes_zone=list_missing_attributes_zone))
+            if 'name' not in list_missing_attributes_zone:
+                list_names_duplicated = verify_name_duplicates_4(scenario, 'zone')
+                if list_names_duplicated:
+                    if print_results:
+                        print('+ Ensure name(s) are unique in zone.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated))
+    if 'surroundings' not in list_missing_files_shp_building_geometry:
+        list_missing_attributes_surroundings = verify_shp(scenario, 'surroundings', COLUMNS_SURROUNDINGS_4)
+        if list_missing_attributes_surroundings:
+            if print_results:
+                print('+ Ensure attribute(s) are present in surroundings.shp: {missing_attributes_surroundings}'.format(missing_attributes_surroundings=list_missing_attributes_surroundings))
+            if 'name' not in list_missing_attributes_surroundings:
+                list_names_duplicated = verify_name_duplicates_4(scenario, 'surroundings')
+                if list_names_duplicated:
+                    if print_results:
+                        print('+ Ensure name(s) are unique in surroundings.shp: {list_names_duplicated} is duplicated.'.format(list_names_duplicated=list_names_duplicated))
+
+    #2. about .csv files under the "inputs/building-properties" folder
+    list_missing_columns_air_conditioning = []
+    list_missing_columns_architecture = []
+    list_missing_columns_indoor_comfort = []
+    list_missing_columns_internal_loads = []
+    list_missing_columns_supply_systems = []
+
+    list_missing_files_csv_building_properties = verify_file_exists_4(scenario, CSV_BUILDING_PROPERTIES_4)
+    if list_missing_files_csv_building_properties:
+        if print_results:
+            print('+ Ensure .csv file(s) are present in the building-properties folder: {missing_files_csv_building_properties}'.format(missing_files_csv_building_properties=list_missing_files_csv_building_properties))
+
+    for item, columns in [
+        ('air_conditioning', COLUMNS_AIR_CONDITIONING_4),
+        ('architecture', COLUMNS_ARCHITECTURE_4),
+        ('indoor_comfort', COLUMNS_INDOOR_COMFORT_4),
+        ('internal_loads', COLUMNS_INTERNAL_LOADS_4),
+        ('supply_systems', COLUMNS_SUPPLY_SYSTEMS_4)
+    ]:
+        if item not in list_missing_files_csv_building_properties:
+            locals()[f'list_missing_columns_{item}'] = verify_csv_file(
+                scenario, item, columns, print_results=print_results
+            )
+    #3. verify if terrain.tif, weather.epw and streets.shp exist
+    list_missing_files_terrain = verify_file_exists_4(scenario, ['terrain'])
+    if list_missing_files_terrain:
+        if print_results:
+            print('+ Ensure terrain.tif are present in the typography folder. Consider running Terrain Helper under Data Management.')
+
+    list_missing_files_weather = verify_file_exists_4(scenario, ['weather'])
+    if list_missing_files_weather:
+        if print_results:
+            print('+ Ensure weather.epw are present in the typography folder. Consider running Weather Helper under Data Management.')
+
+    list_missing_files_streets = verify_file_exists_4(scenario, ['streets'])
+    if list_missing_files_streets:
+        if print_results:
+            print('+ Ensure streets.shp are present in the typography folder. Consider running Streets Helper under Data Management, if Thermal-Networks analysis is required.')
+
+    #4. verify the DB under the "inputs/technology/" folder
+    list_missing_files_db = []
+
+    # Compile the results
+    dict_missing = {
+        'building-geometry': list_missing_files_shp_building_geometry,
+        'zone': list_missing_attributes_zone,
+        'surroundings': list_missing_attributes_surroundings,
+        'building-properties': list_missing_files_csv_building_properties,
+        'air_conditioning': list_missing_columns_air_conditioning,
+        'architecture': list_missing_columns_architecture,
+        'indoor_comfort': list_missing_columns_indoor_comfort,
+        'internal_loads': list_missing_columns_internal_loads,
+        'supply_systems': list_missing_columns_supply_systems,
+        'terrain': list_missing_files_terrain,
+        'weather': list_missing_files_weather,
+        'streets': list_missing_files_streets,
+        'db': list_missing_files_db
+    }
+
+    # Print: End
+    if print_results:
+        print("-" * 50)
+
+    return dict_missing
+
+
+## --------------------------------------------------------------------------------------------------------------------
+## Main function
+## --------------------------------------------------------------------------------------------------------------------
+
+
+def main(config):
+    # Start the timer
+    t0 = time.perf_counter()
+    assert os.path.exists(config.general.project), 'input file not found: %s' % config.project
+
+    # Get the scenario name
+    scenario = config.scenario
+    scenario_name = os.path.basename(scenario)
+
+    # Print: Start
+    div_len = 37 - len(scenario_name)
+    print('+' * 60)
+    print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario_name) + "-" * div_len)
+
+    # Execute the verification
+    dict_missing = cea4_verify(scenario, print_results=True)
+
+    # Print the results
+    print_verification_results_4(scenario_name, dict_missing)
+
+    # Print the time used for the entire processing
+    time_elapsed = time.perf_counter() - t0
+
+    # Print: End
+    # print("-" * 1 + ' Scenario: {scenario} - end '.format(scenario=scenario_name) + "-" * 50)
+    print('+' * 60)
+    print('The entire process of CEA-4 format verification is now completed - time elapsed: %.2f seconds' % time_elapsed)
+
+if __name__ == '__main__':
+    main(cea.config.Configuration())
diff --git a/cea/datamanagement/format_helper/format_helper.py b/cea/datamanagement/format_helper/format_helper.py
new file mode 100644
index 000000000..5b8fe7da9
--- /dev/null
+++ b/cea/datamanagement/format_helper/format_helper.py
@@ -0,0 +1,86 @@
+"""
+Main script of the formate helper that activates the verification and migration as needed.
+
+"""
+
+import os
+import cea.config
+import time
+from cea.datamanagement.format_helper.cea4_verify import cea4_verify, print_verification_results_4
+from cea.datamanagement.format_helper.cea4_migrate import migrate_cea3_to_cea4
+
+__author__ = "Zhongming Shi"
+__copyright__ = "Copyright 2025, Architecture and Building Systems - ETH Zurich"
+__credits__ = ["Zhongming Shi"]
+__license__ = "MIT"
+__version__ = "0.1"
+__maintainer__ = "Reynold Mok"
+__email__ = "cea@arch.ethz.ch"
+__status__ = "Production"
+
+
+def exec_cea_format_helper(config, cea_scenario):
+    # auto-migrate from CEA-3 to CEA-4
+    bool_migrate = config.format_helper.migrate_from_cea_3
+    scenario_name = os.path.basename(cea_scenario)
+
+    if not bool_migrate:
+        # subprocess.run(['cea', 'cea4_migrate', '--scenario', cea_scenario], env=my_env, check=True,capture_output=True)
+        dict_missing = cea4_verify(cea_scenario, print_results=True)
+        print_verification_results_4(scenario_name, dict_missing)
+
+    else:
+        # subprocess.run(['cea', 'cea4_verify', '--scenario', cea_scenario], env=my_env, check=True, capture_output=True)
+        migrate_cea3_to_cea4(cea_scenario)
+        dict_missing = cea4_verify(cea_scenario)
+        print_verification_results_4(scenario_name, dict_missing)
+
+
+## --------------------------------------------------------------------------------------------------------------------
+## Main function
+## --------------------------------------------------------------------------------------------------------------------
+
+def main(config):
+    """
+    Batch processing all selected scenarios under a project.
+
+    :param config: the configuration object to use
+    :type config: cea.config.Configuration
+    :return:
+    """
+
+    # Start the timer
+    t0 = time.perf_counter()
+
+    assert os.path.exists(config.general.project), 'input file not found: %s' % config.general.project
+
+    project_path = config.general.project
+    scenarios_list = config.format_helper.scenarios_to_verify_and_migrate
+
+    print('+' * 60)
+    print(f'Format Helper is batch-processing the data verification and migration for Scenarios: {scenarios_list}.')
+
+    # Loop over one or all selected scenarios under the project
+    for scenario in scenarios_list:
+        # Ignore hidden directories
+        if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)):
+            continue
+
+        # Print: Start
+        div_len = 37 - len(scenario)
+        print('+' * 60)
+        print("-" * 1 + ' Scenario: {scenario} '.format(scenario=scenario) + "-" * div_len)
+
+        cea_scenario = os.path.join(project_path, scenario)
+        # executing CEA commands
+        exec_cea_format_helper(config, cea_scenario)
+
+    # Print the time used for the entire processing
+    time_elapsed = time.perf_counter() - t0
+    print('+' * 60)
+    print(
+        'The entire batch processing of data format verification (and migration) for CEA-4 is now completed - time elapsed: %.2f seconds' % time_elapsed)
+
+
+if __name__ == '__main__':
+    main(cea.config.Configuration())
diff --git a/cea/default.config b/cea/default.config
index 7a61de74b..d4644f451 100644
--- a/cea/default.config
+++ b/cea/default.config
@@ -360,9 +360,9 @@ operational.type = BooleanParameter
 operational.help = Estimate Operational costs (variable and fixed) due to supply systems operation (allocated according to the supply-systems input database)
 
 [emissions]
-year-to-calculate = 2024
+year-to-calculate = 2025
 year-to-calculate.type = IntegerParameter
-year-to-calculate.help = Defines the time period (year-to-calculate - year_built or year_renovated) over which embodied emissions are calculated. If greater than 60 years, embodied emissions are not considered.
+year-to-calculate.help = Defines the year to calculate the emissions. If a building is greater than 60 years, embodied emissions are not considered.
 
 embodied = true
 embodied.type = BooleanParameter
@@ -610,11 +610,19 @@ variable-5-upper-bound.type = RealParameter
 variable-5-upper-bound.help = Upper bound of Variable 5.
 variable-5-upper-bound.nullable = true
 
+[format-helper]
+scenarios-to-verify-and-migrate =
+scenarios-to-verify-and-migrate.type = ScenarioNameMultiChoiceParameter
+scenarios-to-verify-and-migrate.help = Select the Scenario(s) to verify (and migrate). Leave blank to select all Scenarios.
+
+migrate-from-cea-3 = true
+migrate-from-cea-3.type = BooleanParameter
+migrate-from-cea-3.help = True to auto-migrate the input format to the current version of CEA-4 from CEA-3. Note the process of migration is not reversible.
 
 [batch-process-workflow]
 scenarios-to-simulate =
 scenarios-to-simulate.type = ScenarioNameMultiChoiceParameter
-scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all scenarios. Consider excluding the reference-Scenario when import-from-rhino-gh is set to True.
+scenarios-to-simulate.help = Select the Scenario(s) to simulate. Leave blank to simulate all Scenarios. Exclude the reference-Scenario when import-from-rhino-gh is set to True.
 
 export-to-rhino-gh = false
 export-to-rhino-gh.type = BooleanParameter
diff --git a/cea/scripts.yml b/cea/scripts.yml
index 964d870c0..fe5ad1993 100644
--- a/cea/scripts.yml
+++ b/cea/scripts.yml
@@ -357,6 +357,34 @@ Data Management:
 
 Utilities:
 
+  - name: cea4-format-helper
+    label: CEA-4 Format Helper
+    description: |
+      Verifies the inputs are in the correct format for CEA-4.
+      Migrates Late-CEA-3 inputs data  - Note the process of migration is not reversible.
+
+    interfaces: [cli, dashboard]
+    module: cea.datamanagement.format_helper.format_helper
+    parameters: ['general:scenario',format-helper]
+
+  - name: cea4-migrate
+    label: CEA-4 Format Helper - Migrate
+    description: |
+      Migrates Late-CEA-3 inputs data  - Note the process of migration is not reversible.
+
+    interfaces: [cli]
+    module: cea.datamanagement.format_helper.cea4_migrate
+    parameters: ['general:scenario']
+
+  - name: cea4-verify
+    label: CEA-4 Format Helper - Verify
+    description: |
+      Verifies the inputs are in the correct format for CEA-4.
+
+    interfaces: [cli]
+    module: cea.datamanagement.format_helper.cea4_verify
+    parameters: ['general:scenario']
+
   - name: sensitivity-analysis-sampler
     label: Generate Samples for Sensitivity Analysis (SA)
     description: Generate samples for sensitivity analysis using Sobol Method.
diff --git a/cea/utilities/batch_process_workflow.py b/cea/utilities/batch_process_workflow.py
index 9e2dc7862..95523e246 100644
--- a/cea/utilities/batch_process_workflow.py
+++ b/cea/utilities/batch_process_workflow.py
@@ -156,7 +156,7 @@ def exec_cea_commands(config, cea_scenario):
 
 def main(config):
     """
-    Batch processing all scenarios under a project.
+    Batch processing all selectedscenarios under a project.
 
     :param config: the configuration object to use
     :type config: cea.config.Configuration
@@ -172,7 +172,7 @@ def main(config):
     scenario_name = config.general.scenario_name
     scenarios_list = config.batch_process_workflow.scenarios_to_simulate
 
-    # Loop over one or all scenarios under the project
+    # Loop over one or all selected scenarios under the project
     for scenario in scenarios_list:
         # Ignore hidden directories
         if scenario.startswith('.') or os.path.isfile(os.path.join(project_path, scenario)):
@@ -184,7 +184,7 @@ def main(config):
             # executing CEA commands
             exec_cea_commands(config, cea_scenario)
         except subprocess.CalledProcessError as e:
-            print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}")
+            print(f"CEA simulation for scenario `{scenario_name}` failed at script: {e.cmd[1]}.")
             err_msg = e.stderr
             if err_msg is not None:
                 print(err_msg.decode())