IN-CORE · longshuicy · Aug 28, 2024 · Jul 23, 2024 · Jul 24, 2024 · Jul 25, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 ## [Unreleased]
 
 ### Changed
+- Support Interdependent recovery of residential buildings and households [#606](https://github.com/IN-CORE/pyincore/pull/606)
 - Remove unused insecure IN-CORE client [#581](https://github.com/IN-CORE/pyincore/issues/581)
 
 ### Added

diff --git a/pyincore/analyses/housingrecoverysequential/housingrecoverysequential.py b/pyincore/analyses/housingrecoverysequential/housingrecoverysequential.py
@@ -17,7 +17,7 @@ class HousingRecoverySequential(BaseAnalysis):
     dislocation dataset, a transition probability matrix (TPM) and an initial state vector.
 
     The computation operates by segregating household units into five zones as a way of
-    assigning social vulnerability. Using this vulnerability in conjunction with the TPM
+    assigning social vulnerability or household income. Using this vulnerability in conjunction with the TPM
     and the initial state vector, a Markov chain computation simulates most probable
     states to generate a stage history of housing recovery changes for each household.
 
@@ -35,8 +35,8 @@ class HousingRecoverySequential(BaseAnalysis):
         incore_client (IncoreClient): Service authentication.
     """
 
-    # Social vulnerability value generators per zone
-    __sv_generator = {
+    # threshold and definition per zone based on social vulnerability analysis
+    __zone_def_sv = {
         "Z1": {
             "threshold_0": 0.95,
             "below_lower": 0.00,
@@ -83,6 +83,45 @@ class HousingRecoverySequential(BaseAnalysis):
         },
     }
 
+    # threshold and definition per zone based on household income
+    __zone_def_hhinc = {
+        "Z1": {
+            "threshold": 0.95,
+            "below_lower": 0.01,
+            "below_upper": 0.15,
+            "above_lower": 0.10,
+            "above_upper": 0.90,
+        },
+        "Z2": {
+            "threshold": 0.85,
+            "below_lower": 0.10,
+            "below_upper": 0.50,
+            "above_lower": 0.10,
+            "above_upper": 0.90,
+        },
+        "Z3": {
+            "threshold": 0.80,
+            "below_lower": 0.30,
+            "below_upper": 0.70,
+            "above_lower": 0.10,
+            "above_upper": 0.90,
+        },
+        "Z4": {
+            "threshold": 0.85,
+            "below_lower": 0.50,
+            "below_upper": 0.90,
+            "above_lower": 0.10,
+            "above_upper": 0.90,
+        },
+        "Z5": {
+            "threshold": 0.95,
+            "below_lower": 0.85,
+            "below_upper": 0.99,
+            "above_lower": 0.10,
+            "above_upper": 0.90,
+        },
+    }
+
     def __init__(self, incore_client):
         super(HousingRecoverySequential, self).__init__(incore_client)
 
@@ -93,6 +132,13 @@ def run(self):
         t_final = self.get_parameter("t_final")
 
         # Load population block data from IN-CORE
+        households_csv = self.get_input_dataset(
+            "population_dislocation_block"
+        ).get_csv_reader()
+
+        # Read the header from households_csv
+        header = next(households_csv)
+
         pop_dis_selectors = [
             "guid",
             "huid",
@@ -102,9 +148,11 @@ def run(self):
             "ownershp",
             "dislocated",
         ]
-        households_csv = self.get_input_dataset(
-            "population_dislocation_block"
-        ).get_csv_reader()
+
+        # Check if 'hhinc' is in the header
+        if "hhinc" in header:
+            pop_dis_selectors.append("hhinc")
+
         households_df = (pd.DataFrame(households_csv))[pop_dis_selectors]
 
         # Perform  conversions across the dataset from object type into the appropriate type
@@ -114,6 +162,10 @@ def run(self):
         households_df["ownershp"] = pd.to_numeric(households_df["ownershp"])
         households_df["dislocated"] = households_df["dislocated"] == "True"
 
+        # Check if 'hhinc' is in the header
+        if "hhinc" in header:
+            households_df["hhinc"] = pd.to_numeric(households_df["hhinc"])
+
         # Load the transition probability matrix from IN-CORE
         tpm_csv = self.get_input_dataset("tpm").get_csv_reader()
         tpm = pd.DataFrame(list(tpm_csv))
@@ -214,16 +266,24 @@ def housing_serial_recovery_model(
         """
         seed = self.get_parameter("seed")
         rng = np.random.RandomState(seed)
-        sv_result = self.get_input_dataset("sv_result").get_dataframe_from_csv(
-            low_memory=False
-        )
-        # turn fips code to string for ease of matching
-        sv_result["FIPS"] = sv_result["FIPS"].astype(str)
 
-        # Compute the social vulnerability zone using known factors
-        households_df = self.compute_social_vulnerability_zones(
-            sv_result, households_df
-        )
+        sv_result = self.get_input_dataset("sv_result")
+        are_zones_from_sv = False
+
+        if sv_result is not None:
+            are_zones_from_sv = True
+            sv_result = sv_result.get_dataframe_from_csv(low_memory=False)
+
+            # turn fips code to string for ease of matching
+            sv_result["FIPS"] = sv_result["FIPS"].astype(str)
+
+            # Compute the social vulnerability zone using known factors
+            households_df = self.compute_social_vulnerability_zones(
+                sv_result, households_df
+            )
+        else:
+            # If sv result is Null, calculate the zones by using household income (hhinc)
+            households_df = self.compute_zones_by_household_income(households_df)
 
         # Set the number of Markov chain stages
         stages = int(t_final / t_delta)
@@ -237,7 +297,7 @@ def housing_serial_recovery_model(
         # Obtain a social vulnerability score stochastically per household
         # We use them later to construct the final output dataset
         sv_scores = self.compute_social_vulnerability_values(
-            households_df, num_households, rng
+            households_df, num_households, rng, are_zones_from_sv
         )
 
         # We store Markov states as a list of numpy arrays for convenience and add each one by one
@@ -352,6 +412,7 @@ def housing_serial_recovery_model(
         result["guid"] = households_df["guid"]
         result["huid"] = households_df["huid"]
         result["Zone"] = households_df["Zone"]
+        result["zone_indic"] = households_df["zone_indic"]
         result["SV"] = sv_scores
         column_names = [str(i) for i in range(1, stages + 1)]
 
@@ -391,58 +452,124 @@ def compute_social_vulnerability_zones(sv_result, households_df):
         # e.g.Medium Vulnerable (zone3) extract the number 3 to construct Z3
         households_df["Zone"] = households_df["zone"].apply(lambda row: "Z" + row[-2])
 
+        # add an indicator showing the zones are from Social Vulnerability analysis
+        households_df["zone_indic"] = "SV"
+
         return households_df[households_df["Zone"] != "missing"]
 
-    def compute_social_vulnerability_values(self, households_df, num_households, rng):
+    def compute_social_vulnerability_values(
+        self, households_df, num_households, rng, are_zones_from_sv
+    ):
         """
         Compute the social vulnerability score of a household depending on its zone
         Args:
             households_df (pd.DataFrame): Information about household zones.
             num_households (int): Number of households.
             rng (np.RandomState): Random state to draw pseudo-random numbers from.
+            are_zones_from_sv: Boolean indicating whether zones are from social vulnerability analysis.
         Returns:
             pd.Series: social vulnerability scores.
         """
         # Social vulnerability zone generator: this generalizes the code in the first version
         sv_scores = np.zeros(num_households)
         zones = households_df["Zone"].to_numpy()
 
+        # zones from social vulnerability analusis
+        if are_zones_from_sv:
+            zone_def = self.get_input_dataset("zone_def_sv")
+            if zone_def is None:
+                zone_def = self.__zone_def_sv
+            else:
+                zone_def = zone_def.get_json_reader()
+
+        # zones caulculated by household income (hhinc)
+        else:
+            zone_def = self.get_input_dataset("zone_def_hhinc")
+            if zone_def is None:
+                zone_def = self.__zone_def_hhinc
+            else:
+                zone_def = zone_def.get_json_reader()
+
         for household in range(0, num_households):
             spin = rng.rand()
             zone = zones[household]
 
-            if spin < self.__sv_generator[zone]["threshold_0"]:
-                sv_scores[household] = round(
-                    rng.uniform(
-                        self.__sv_generator[zone]["below_lower"],
-                        self.__sv_generator[zone]["below_upper"],
-                    ),
-                    3,
-                )
-
-            # for zone 2, 3, 4 there is additional middle range
-            elif (
-                "threshold_1" in self.__sv_generator[zone].keys()
-                and spin < self.__sv_generator[zone]["threshold_1"]
-            ):
-                sv_scores[household] = round(
-                    rng.uniform(
-                        self.__sv_generator[zone]["middle_lower"],
-                        self.__sv_generator[zone]["middle_upper"],
-                    ),
-                    3,
-                )
+            if are_zones_from_sv:
+                if spin < zone_def[zone]["threshold_0"]:
+                    sv_scores[household] = round(
+                        rng.uniform(
+                            zone_def[zone]["below_lower"],
+                            zone_def[zone]["below_upper"],
+                        ),
+                        3,
+                    )
+
+                # for zone 2, 3, 4 there is additional middle range
+                elif (
+                    "threshold_1" in zone_def[zone].keys()
+                    and spin < zone_def[zone]["threshold_1"]
+                ):
+                    sv_scores[household] = round(
+                        rng.uniform(
+                            zone_def[zone]["middle_lower"],
+                            zone_def[zone]["middle_upper"],
+                        ),
+                        3,
+                    )
+                else:
+                    sv_scores[household] = round(
+                        rng.uniform(
+                            zone_def[zone]["above_lower"],
+                            zone_def[zone]["above_upper"],
+                        ),
+                        3,
+                    )
+
             else:
-                sv_scores[household] = round(
-                    rng.uniform(
-                        self.__sv_generator[zone]["above_lower"],
-                        self.__sv_generator[zone]["above_upper"],
-                    ),
-                    3,
-                )
+                if spin < zone_def[zone]["threshold"]:
+                    sv_scores[household] = round(
+                        rng.uniform(
+                            zone_def[zone]["below_lower"], zone_def[zone]["below_upper"]
+                        ),
+                        3,
+                    )
+                else:
+                    sv_scores[household] = round(
+                        rng.uniform(
+                            zone_def[zone]["above_lower"], zone_def[zone]["above_upper"]
+                        ),
+                        3,
+                    )
 
         return sv_scores
 
+    @staticmethod
+    def compute_zones_by_household_income(households_df):
+        """
+        Compute the zones based on household income. Updates the household dataset
+        by adding a new `Zone` column and removing values with missing Zone.
+
+        Args:
+            households_df (pd.DataFrame): Vector position of a household.
+
+        Returns:
+            households_df (pd.DataFrame): Vector position of a household with additional zones.
+
+        """
+        zone_map = {0: "Z5", 1: "Z4", 2: "Z3", 3: "Z2", 4: "Z1", np.nan: "missing"}
+
+        households_df["Zone"] = "missing"
+        households_df.loc[households_df["hhinc"] == 5, "Zone"] = zone_map[4]
+        households_df.loc[households_df["hhinc"] == 4, "Zone"] = zone_map[3]
+        households_df.loc[households_df["hhinc"] == 3, "Zone"] = zone_map[2]
+        households_df.loc[households_df["hhinc"] == 2, "Zone"] = zone_map[1]
+        households_df.loc[households_df["hhinc"] == 1, "Zone"] = zone_map[0]
+
+        # add an indicator showing the zones are from Social Vulnerability analysis
+        households_df["zone_indic"] = "hhinc"
+
+        return households_df[households_df["Zone"] != "missing"]
+
     @staticmethod
     def compute_regressions(markov_stages, household, lower, upper):
         """
@@ -535,11 +662,24 @@ def get_spec(self):
                 },
                 {
                     "id": "sv_result",
-                    "required": True,
+                    "required": False,
                     "description": "A csv file with zones containing demographic factors"
                     "qualified by a social vulnerability score",
                     "type": ["incore:socialVulnerabilityScore"],
                 },
+                {
+                    "id": "zone_def_sv",
+                    "required": False,
+                    "description": "A json file with thresholds and definitions per zone "
+                    "based on social vulnerability analysis",
+                    "type": ["incore:zoneDefinitionsSocialVulnerability"],
+                },
+                {
+                    "id": "zone_def_hhinc",
+                    "required": False,
+                    "description": "A json file with thresholds and definitions per zone based on household income",
+                    "type": ["incore:zoneDefinitionsHouseholdIncome"],
+                },
             ],
             "output_datasets": [
                 {