From 9c80915a26ebf31676a05790183fcef94acdca5b Mon Sep 17 00:00:00 2001
From: Chen Wang <cwang138@illinois.edu>
Date: Tue, 14 May 2024 16:46:16 -0500
Subject: [PATCH 1/4] add repair time analysis

---
 pyincore/analyses/buildingrepair/__init__.py  |   8 +
 .../analyses/buildingrepair/buildingrepair.py | 165 ++++++++++++++++++
 2 files changed, 173 insertions(+)
 create mode 100644 pyincore/analyses/buildingrepair/__init__.py
 create mode 100644 pyincore/analyses/buildingrepair/buildingrepair.py

diff --git a/pyincore/analyses/buildingrepair/__init__.py b/pyincore/analyses/buildingrepair/__init__.py
new file mode 100644
index 000000000..a8b85813a
--- /dev/null
+++ b/pyincore/analyses/buildingrepair/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) 2019 University of Illinois and others. All rights reserved.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Mozilla Public License v2.0 which accompanies this distribution,
+# and is available at https://www.mozilla.org/en-US/MPL/2.0/
+
+
+from pyincore.analyses.buildingrepair.buildingrepair import BuildingRepair
diff --git a/pyincore/analyses/buildingrepair/buildingrepair.py b/pyincore/analyses/buildingrepair/buildingrepair.py
new file mode 100644
index 000000000..15ec3fef7
--- /dev/null
+++ b/pyincore/analyses/buildingrepair/buildingrepair.py
@@ -0,0 +1,165 @@
+# Copyright (c) 2019 University of Illinois and others. All rights reserved.
+#
+# This program and the accompanying materials are made available under the
+# terms of the Mozilla Public License v2.0 which accompanies this distribution,
+# and is available at https://www.mozilla.org/en-US/MPL/2.0/
+
+import numpy as np
+import pandas as pd
+
+from pyincore import BaseAnalysis, RepairService
+from pyincore.analyses.buildingdamage.buildingutil import BuildingUtil
+
+
+class BuildingRepair(BaseAnalysis):
+    """
+    This analysis computes the repair time needed for each building from any damage state. The repair model followed
+    the FEMA P-58 approach and was controlled by fragility functions.
+
+    The outputs of this analysis is a CSV file with repair time for simulated damage state at the building level.
+
+    Contributors
+        | Science: Wanting Lisa Wang, John W. van de Lindt
+        | Implementation: NCSA IN-CORE Dev Team
+
+    Related publications
+        Wang, Wanting Lisa, and John W. van de Lindt. "Quantitative Modeling of Residential Building Disaster Recovery
+        and Effects of Pre-and Post-event Policies." International Journal of Disaster Risk Reduction (2021): 102259.
+
+    Args:
+        incore_client (IncoreClient): Service authentication.
+
+    """
+
+    def __init__(self, incore_client):
+        self.repairsvc = RepairService(incore_client)
+
+        super(BuildingRepair, self).__init__(incore_client)
+
+    def run(self):
+        """Executes the residential building recovery analysis.
+
+        Returns:
+            bool: True if successful, False otherwise.
+
+        """
+        result_name = self.get_parameter("result_name")
+
+        buildings = self.get_input_dataset("buildings").get_inventory_reader()
+        buildings = list(buildings)
+        sample_damage_states = self.get_input_dataset("sample_damage_states").get_dataframe_from_csv(low_memory=False)
+
+        # Returns dataframe
+        repair_results = self.recovery_rate(buildings, sample_damage_states)
+        self.set_result_csv_data("building_repair", repair_results, result_name, "dataframe")
+
+        return True
+
+    def recovery_rate(self, buildings, sample_damage_states):
+        """ Gets repair time required for each building.
+
+        Args:
+            buildings (list): List of buildings
+            sample_damage_states (pd.DataFrame): Samples' damage states
+
+        Returns:
+            pd.DataFrame: Repair time of all buildings for each sample
+        """
+        seed = self.get_parameter("seed")
+        if seed is not None:
+            np.random.seed(seed)
+
+        repair_key = self.get_parameter("repair_key")
+        if repair_key is None:
+            repair_key = BuildingUtil.DEFAULT_REPAIR_KEY
+            self.set_parameter("repair_key", repair_key)
+        repair_sets = self.repairsvc.match_inventory(self.get_input_dataset("dfr3_mapping_set"), buildings, repair_key)
+        repair_sets_by_guid = {}  # get repair sets by guid so they can be mapped with output of monte carlo
+
+        # This is sort of a workaround until we define Repair Curve models and abstract this out there
+        for i, b in enumerate(buildings):
+            repair_sets_by_guid[b["properties"]['guid']] = repair_sets[str(i)]
+
+        for index, row in sample_damage_states.iterrows():
+            # Obtain the damage states
+            mapped_repair = repair_sets_by_guid[row['guid']]
+            samples_mcs = row['sample_damage_states'].split(",")
+            num_samples = len(samples_mcs)
+
+            # Use a lambda to obtain the damage state in numeric form. Note that since damage states are single digits,
+            # it suffices to look at the last character and convert into an integer value. Do this computation once
+            # per household only.
+            samples_mcs_ds = list(map(lambda x: int(x[-1]), samples_mcs))
+
+            # Now, perform the two nested loops, using the indexing function to simplify the syntax.
+            for i in range(0, num_samples):
+                state = samples_mcs_ds[i]
+
+                percent_func = np.random.random(num_samples)
+                # NOTE: Even though the kwarg name is "repair_time", it actually takes percent of functionality. DFR3
+                # system currently doesn't have a way to represent the name correctly when calculating the inverse.
+                repair_time = mapped_repair.repair_curves[state].solve_curve_for_inverse(
+                    hazard_values={}, curve_parameters=mapped_repair.curve_parameters, **{"repair_time": percent_func}
+                ) / 7
+
+        return repair_time
+
+    def get_spec(self):
+        """Get specifications of the residential building recovery analysis.
+
+        Returns:
+            obj: A JSON object of specifications of the residential building recovery analysis.
+
+        """
+        return {
+            'name': 'building repair',
+            'description': 'calculate building repair time',
+            'input_parameters': [
+                {
+                    'id': 'result_name',
+                    'required': True,
+                    'description': 'name of the result',
+                    'type': str
+                },
+                {
+                    'id': 'repair_key',
+                    'required': False,
+                    'description': 'Repair key to use in mapping dataset',
+                    'type': str
+                },
+                {
+                    'id': 'seed',
+                    'required': False,
+                    'description': 'Initial seed for the probabilistic model',
+                    'type': int
+                }
+            ],
+            'input_datasets': [
+                {
+                    'id': 'buildings',
+                    'required': True,
+                    'description': 'Building Inventory',
+                    'type': ['ergo:buildingInventoryVer4', 'ergo:buildingInventoryVer5', 'ergo:buildingInventoryVer6',
+                             'ergo:buildingInventoryVer7']
+                },
+                {
+                    'id': 'dfr3_mapping_set',
+                    'required': True,
+                    'description': 'DFR3 Mapping Set Object',
+                    'type': ['incore:dfr3MappingSet'],
+                },
+                {
+                    'id': 'sample_damage_states',
+                    'required': True,
+                    'description': 'Sample damage states',
+                    'type': ['incore:sampleDamageState']
+                },
+            ],
+            'output_datasets': [
+                {
+                    'id': 'repair_time',
+                    'description': 'CSV file of building repair times',
+                    'type': 'incore:buildingRepairTime'
+                }
+            ]
+        }

From 1c9a2411feaf5331d1bb22edbf1fb97ec5f5aedf Mon Sep 17 00:00:00 2001
From: Chen Wang <cwang138@illinois.edu>
Date: Mon, 20 May 2024 14:46:26 -0500
Subject: [PATCH 2/4] repair temp

---
 .../analyses/buildingrepair/buildingrepair.py | 16 ++++++--
 .../buildingrepair/test_buildingrepair.py     | 41 +++++++++++++++++++
 2 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 tests/pyincore/analyses/buildingrepair/test_buildingrepair.py

diff --git a/pyincore/analyses/buildingrepair/buildingrepair.py b/pyincore/analyses/buildingrepair/buildingrepair.py
index 15ec3fef7..92fee42db 100644
--- a/pyincore/analyses/buildingrepair/buildingrepair.py
+++ b/pyincore/analyses/buildingrepair/buildingrepair.py
@@ -51,7 +51,7 @@ def run(self):
 
         # Returns dataframe
         repair_results = self.recovery_rate(buildings, sample_damage_states)
-        self.set_result_csv_data("building_repair", repair_results, result_name, "dataframe")
+        self.set_result_csv_data("repair_time", repair_results, result_name, "dataframe")
 
         return True
 
@@ -69,6 +69,8 @@ def recovery_rate(self, buildings, sample_damage_states):
         if seed is not None:
             np.random.seed(seed)
 
+        num_samples = self.get_parameter("num_samples")
+
         repair_key = self.get_parameter("repair_key")
         if repair_key is None:
             repair_key = BuildingUtil.DEFAULT_REPAIR_KEY
@@ -84,7 +86,6 @@ def recovery_rate(self, buildings, sample_damage_states):
             # Obtain the damage states
             mapped_repair = repair_sets_by_guid[row['guid']]
             samples_mcs = row['sample_damage_states'].split(",")
-            num_samples = len(samples_mcs)
 
             # Use a lambda to obtain the damage state in numeric form. Note that since damage states are single digits,
             # it suffices to look at the last character and convert into an integer value. Do this computation once
@@ -92,7 +93,7 @@ def recovery_rate(self, buildings, sample_damage_states):
             samples_mcs_ds = list(map(lambda x: int(x[-1]), samples_mcs))
 
             # Now, perform the two nested loops, using the indexing function to simplify the syntax.
-            for i in range(0, num_samples):
+            for i in range(0, len(samples_mcs)):
                 state = samples_mcs_ds[i]
 
                 percent_func = np.random.random(num_samples)
@@ -101,6 +102,7 @@ def recovery_rate(self, buildings, sample_damage_states):
                 repair_time = mapped_repair.repair_curves[state].solve_curve_for_inverse(
                     hazard_values={}, curve_parameters=mapped_repair.curve_parameters, **{"repair_time": percent_func}
                 ) / 7
+                print(repair_time)
 
         return repair_time
 
@@ -132,7 +134,13 @@ def get_spec(self):
                     'required': False,
                     'description': 'Initial seed for the probabilistic model',
                     'type': int
-                }
+                },
+                {
+                    'id': 'num_samples',
+                    'required': True,
+                    'description': 'Number of sample scenarios',
+                    'type': int
+                },
             ],
             'input_datasets': [
                 {
diff --git a/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py b/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py
new file mode 100644
index 000000000..1ca918ef6
--- /dev/null
+++ b/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py
@@ -0,0 +1,41 @@
+# This program and the accompanying materials are made available under the
+# terms of the Mozilla Public License v2.0 which accompanies this distribution,
+# and is available at https://www.mozilla.org/en-US/MPL/2.0/
+
+from pyincore import IncoreClient, RepairService, MappingSet
+from pyincore.analyses.buildingrepair.buildingrepair import BuildingRepair
+import pyincore.globals as pyglobals
+
+
+def run_with_base_class():
+    client = IncoreClient(pyglobals.INCORE_API_DEV_URL)
+
+    # Joplin
+    buildings = "5df7d0de425e0b00092d0082"  # joplin ergo:buildingInventoryVer6 28k buildings
+
+    # sample_damage_states = "6112d9ccca3e973ce144b4d9"  # 500 samples 28k buildings - MCS output format
+    sample_damage_states = "60f883c059a8cc52bab4dd77"  # 10 samples 28k buildings - MCS output format
+    result_name = "joplin_repair_time"
+
+    seed = 1238
+
+    building_repair = BuildingRepair(client)
+    building_repair.load_remote_input_dataset("buildings", buildings)
+
+    mapping_id = "60edfa3efc0f3a7af53a21b5"
+    repair_service = RepairService(client)
+    mapping_set = MappingSet(repair_service.get_mapping(mapping_id))
+    building_repair.set_input_dataset('dfr3_mapping_set', mapping_set)
+
+    building_repair.load_remote_input_dataset("sample_damage_states", sample_damage_states)
+
+    building_repair.set_parameter("result_name", result_name)
+    building_repair.set_parameter("seed", seed)
+
+    building_repair.run_analysis()
+
+    return True
+
+
+if __name__ == '__main__':
+    run_with_base_class()

From 5daebe04563d8ffaef3d9ef7c59d6eeb8c2288c7 Mon Sep 17 00:00:00 2001
From: Chen Wang <cwang138@illinois.edu>
Date: Mon, 20 May 2024 17:10:57 -0500
Subject: [PATCH 3/4] separate out the recovery time part

---
 .../analyses/buildingrepair/buildingrepair.py | 64 ++++++++++++-------
 .../buildingrepair/test_buildingrepair.py     |  7 +-
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/pyincore/analyses/buildingrepair/buildingrepair.py b/pyincore/analyses/buildingrepair/buildingrepair.py
index 92fee42db..a25209c9f 100644
--- a/pyincore/analyses/buildingrepair/buildingrepair.py
+++ b/pyincore/analyses/buildingrepair/buildingrepair.py
@@ -50,8 +50,8 @@ def run(self):
         sample_damage_states = self.get_input_dataset("sample_damage_states").get_dataframe_from_csv(low_memory=False)
 
         # Returns dataframe
-        repair_results = self.recovery_rate(buildings, sample_damage_states)
-        self.set_result_csv_data("repair_time", repair_results, result_name, "dataframe")
+        recovery = self.recovery_rate(buildings, sample_damage_states)
+        self.set_result_csv_data("recovery", recovery, result_name + "_recovery", "dataframe")
 
         return True
 
@@ -69,7 +69,15 @@ def recovery_rate(self, buildings, sample_damage_states):
         if seed is not None:
             np.random.seed(seed)
 
-        num_samples = self.get_parameter("num_samples")
+        num_samples = len(sample_damage_states["sample_damage_states"].iloc[0].split(","))
+
+        # Generate a long numpy matrix for combined N1, N2 samples
+        num_buildings = sample_damage_states.shape[0]
+        samples_n1_n2 = np.zeros((num_buildings, num_samples * num_samples))
+
+        # Now, we define an internal function to take care of the index for the prior case
+        def idx(x, y):
+            return x * num_samples + y
 
         repair_key = self.get_parameter("repair_key")
         if repair_key is None:
@@ -78,14 +86,17 @@ def recovery_rate(self, buildings, sample_damage_states):
         repair_sets = self.repairsvc.match_inventory(self.get_input_dataset("dfr3_mapping_set"), buildings, repair_key)
         repair_sets_by_guid = {}  # get repair sets by guid so they can be mapped with output of monte carlo
 
-        # This is sort of a workaround until we define Repair Curve models and abstract this out there
         for i, b in enumerate(buildings):
-            repair_sets_by_guid[b["properties"]['guid']] = repair_sets[str(i)]
+            # if building id has a matched repair curve set
+            if b['id'] in repair_sets.keys():
+                repair_sets_by_guid[b["properties"]['guid']] = repair_sets[b['id']]
+            else:
+                repair_sets_by_guid[b["properties"]['guid']] = None
 
-        for index, row in sample_damage_states.iterrows():
+        for build in range(0, num_buildings):
             # Obtain the damage states
-            mapped_repair = repair_sets_by_guid[row['guid']]
-            samples_mcs = row['sample_damage_states'].split(",")
+            mapped_repair = repair_sets_by_guid[sample_damage_states["guid"].iloc[build]]
+            samples_mcs = sample_damage_states["sample_damage_states"].iloc[build].split(",")
 
             # Use a lambda to obtain the damage state in numeric form. Note that since damage states are single digits,
             # it suffices to look at the last character and convert into an integer value. Do this computation once
@@ -93,18 +104,29 @@ def recovery_rate(self, buildings, sample_damage_states):
             samples_mcs_ds = list(map(lambda x: int(x[-1]), samples_mcs))
 
             # Now, perform the two nested loops, using the indexing function to simplify the syntax.
-            for i in range(0, len(samples_mcs)):
+            for i in range(0, num_samples):
                 state = samples_mcs_ds[i]
 
                 percent_func = np.random.random(num_samples)
                 # NOTE: Even though the kwarg name is "repair_time", it actually takes percent of functionality. DFR3
                 # system currently doesn't have a way to represent the name correctly when calculating the inverse.
-                repair_time = mapped_repair.repair_curves[state].solve_curve_for_inverse(
-                    hazard_values={}, curve_parameters=mapped_repair.curve_parameters, **{"repair_time": percent_func}
-                ) / 7
-                print(repair_time)
+                if mapped_repair is not None:
+                    repair_time = mapped_repair.repair_curves[state].solve_curve_for_inverse(
+                        hazard_values={}, curve_parameters=mapped_repair.curve_parameters,
+                        **{"repair_time": percent_func}
+                    ) / 7
+                else:
+                    repair_time = np.full(num_samples, np.nan)
+
+                for j in range(0, num_samples):
+                    samples_n1_n2[build, idx(i, j)] = round(repair_time[j], 1)
 
-        return repair_time
+        # Now, generate all the labels using list comprehension outside the loops
+        colnames = [f'sample_{i}_{j}' for i in range(0, num_samples) for j in range(0, num_samples)]
+        recovery_time = pd.DataFrame(samples_n1_n2, columns=colnames)
+        recovery_time.insert(0, 'guid', sample_damage_states["guid"])
+
+        return recovery_time
 
     def get_spec(self):
         """Get specifications of the residential building recovery analysis.
@@ -134,13 +156,7 @@ def get_spec(self):
                     'required': False,
                     'description': 'Initial seed for the probabilistic model',
                     'type': int
-                },
-                {
-                    'id': 'num_samples',
-                    'required': True,
-                    'description': 'Number of sample scenarios',
-                    'type': int
-                },
+                }
             ],
             'input_datasets': [
                 {
@@ -165,9 +181,9 @@ def get_spec(self):
             ],
             'output_datasets': [
                 {
-                    'id': 'repair_time',
-                    'description': 'CSV file of building repair times',
-                    'type': 'incore:buildingRepairTime'
+                    'id': 'recovery',
+                    'description': 'CSV file of commercial building recovery time',
+                    'type': 'incore:buildingRecoveryTime'
                 }
             ]
         }
diff --git a/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py b/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py
index 1ca918ef6..457565a64 100644
--- a/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py
+++ b/tests/pyincore/analyses/buildingrepair/test_buildingrepair.py
@@ -15,9 +15,6 @@ def run_with_base_class():
 
     # sample_damage_states = "6112d9ccca3e973ce144b4d9"  # 500 samples 28k buildings - MCS output format
     sample_damage_states = "60f883c059a8cc52bab4dd77"  # 10 samples 28k buildings - MCS output format
-    result_name = "joplin_repair_time"
-
-    seed = 1238
 
     building_repair = BuildingRepair(client)
     building_repair.load_remote_input_dataset("buildings", buildings)
@@ -29,8 +26,8 @@ def run_with_base_class():
 
     building_repair.load_remote_input_dataset("sample_damage_states", sample_damage_states)
 
-    building_repair.set_parameter("result_name", result_name)
-    building_repair.set_parameter("seed", seed)
+    building_repair.set_parameter("result_name", "joplin_repair_time")
+    building_repair.set_parameter("seed", 1238)
 
     building_repair.run_analysis()
 

From 5d86e3c8ac410f46af9f613e6d16af52b9438bc7 Mon Sep 17 00:00:00 2001
From: Chen Wang <cwang138@illinois.edu>
Date: Mon, 20 May 2024 17:12:35 -0500
Subject: [PATCH 4/4] changelog

---
 CHANGELOG.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c2f84f562..08d20a660 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [Unreleased]
+
+### Added
+- Repair analysis that calculates the recovery time matrix [#567](https://github.com/IN-CORE/pyincore/issues/567)
+
 
 ## [1.18.1] - 2024-04-30