Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interdependent recovery of residential buildings and households #606

1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
## [Unreleased]

### Changed
- Support Interdependent recovery of residential buildings and households [#606](https://github.com/IN-CORE/pyincore/pull/606)
- Remove unused insecure IN-CORE client [#581](https://github.com/IN-CORE/pyincore/issues/581)

### Added
Expand Down
232 changes: 186 additions & 46 deletions pyincore/analyses/housingrecoverysequential/housingrecoverysequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class HousingRecoverySequential(BaseAnalysis):
dislocation dataset, a transition probability matrix (TPM) and an initial state vector.

The computation operates by segregating household units into five zones as a way of
assigning social vulnerability. Using this vulnerability in conjunction with the TPM
assigning social vulnerability or household income. Using this vulnerability in conjunction with the TPM
and the initial state vector, a Markov chain computation simulates most probable
states to generate a stage history of housing recovery changes for each household.

Expand All @@ -35,8 +35,8 @@ class HousingRecoverySequential(BaseAnalysis):
incore_client (IncoreClient): Service authentication.
"""

# Social vulnerability value generators per zone
__sv_generator = {
# threshold and definition per zone based on social vulnerability analysis
__zone_def_sv = {
"Z1": {
"threshold_0": 0.95,
"below_lower": 0.00,
Expand Down Expand Up @@ -83,6 +83,45 @@ class HousingRecoverySequential(BaseAnalysis):
},
}

# threshold and definition per zone based on household income
__zone_def_hhinc = {
"Z1": {
"threshold": 0.95,
"below_lower": 0.01,
"below_upper": 0.15,
"above_lower": 0.10,
"above_upper": 0.90,
},
"Z2": {
"threshold": 0.85,
"below_lower": 0.10,
"below_upper": 0.50,
"above_lower": 0.10,
"above_upper": 0.90,
},
"Z3": {
"threshold": 0.80,
"below_lower": 0.30,
"below_upper": 0.70,
"above_lower": 0.10,
"above_upper": 0.90,
},
"Z4": {
"threshold": 0.85,
"below_lower": 0.50,
"below_upper": 0.90,
"above_lower": 0.10,
"above_upper": 0.90,
},
"Z5": {
"threshold": 0.95,
"below_lower": 0.85,
"below_upper": 0.99,
"above_lower": 0.10,
"above_upper": 0.90,
},
}

def __init__(self, incore_client):
super(HousingRecoverySequential, self).__init__(incore_client)

Expand All @@ -93,6 +132,13 @@ def run(self):
t_final = self.get_parameter("t_final")

# Load population block data from IN-CORE
households_csv = self.get_input_dataset(
"population_dislocation_block"
).get_csv_reader()

# Read the header from households_csv
header = next(households_csv)

pop_dis_selectors = [
"guid",
"huid",
Expand All @@ -102,9 +148,11 @@ def run(self):
"ownershp",
"dislocated",
]
households_csv = self.get_input_dataset(
"population_dislocation_block"
).get_csv_reader()

# Check if 'hhinc' is in the header
if "hhinc" in header:
pop_dis_selectors.append("hhinc")

households_df = (pd.DataFrame(households_csv))[pop_dis_selectors]

# Perform conversions across the dataset from object type into the appropriate type
Expand All @@ -114,6 +162,10 @@ def run(self):
households_df["ownershp"] = pd.to_numeric(households_df["ownershp"])
households_df["dislocated"] = households_df["dislocated"] == "True"

# Check if 'hhinc' is in the header
if "hhinc" in header:
households_df["hhinc"] = pd.to_numeric(households_df["hhinc"])

# Load the transition probability matrix from IN-CORE
tpm_csv = self.get_input_dataset("tpm").get_csv_reader()
tpm = pd.DataFrame(list(tpm_csv))
Expand Down Expand Up @@ -214,16 +266,24 @@ def housing_serial_recovery_model(
"""
seed = self.get_parameter("seed")
rng = np.random.RandomState(seed)
sv_result = self.get_input_dataset("sv_result").get_dataframe_from_csv(
low_memory=False
)
# turn fips code to string for ease of matching
sv_result["FIPS"] = sv_result["FIPS"].astype(str)

# Compute the social vulnerability zone using known factors
households_df = self.compute_social_vulnerability_zones(
sv_result, households_df
)
sv_result = self.get_input_dataset("sv_result")
are_zones_from_sv = False

if sv_result is not None:
are_zones_from_sv = True
sv_result = sv_result.get_dataframe_from_csv(low_memory=False)

# turn fips code to string for ease of matching
sv_result["FIPS"] = sv_result["FIPS"].astype(str)

# Compute the social vulnerability zone using known factors
households_df = self.compute_social_vulnerability_zones(
sv_result, households_df
)
else:
# If sv result is Null, calculate the zones by using household income (hhinc)
households_df = self.compute_zones_by_household_income(households_df)

# Set the number of Markov chain stages
stages = int(t_final / t_delta)
Expand All @@ -237,7 +297,7 @@ def housing_serial_recovery_model(
# Obtain a social vulnerability score stochastically per household
# We use them later to construct the final output dataset
sv_scores = self.compute_social_vulnerability_values(
households_df, num_households, rng
households_df, num_households, rng, are_zones_from_sv
)

# We store Markov states as a list of numpy arrays for convenience and add each one by one
Expand Down Expand Up @@ -352,6 +412,7 @@ def housing_serial_recovery_model(
result["guid"] = households_df["guid"]
result["huid"] = households_df["huid"]
result["Zone"] = households_df["Zone"]
result["zone_indic"] = households_df["zone_indic"]
result["SV"] = sv_scores
column_names = [str(i) for i in range(1, stages + 1)]

Expand Down Expand Up @@ -391,58 +452,124 @@ def compute_social_vulnerability_zones(sv_result, households_df):
# e.g.Medium Vulnerable (zone3) extract the number 3 to construct Z3
households_df["Zone"] = households_df["zone"].apply(lambda row: "Z" + row[-2])

# add an indicator showing the zones are from Social Vulnerability analysis
households_df["zone_indic"] = "SV"

return households_df[households_df["Zone"] != "missing"]

def compute_social_vulnerability_values(self, households_df, num_households, rng):
def compute_social_vulnerability_values(
self, households_df, num_households, rng, are_zones_from_sv
):
"""
Compute the social vulnerability score of a household depending on its zone
Args:
households_df (pd.DataFrame): Information about household zones.
num_households (int): Number of households.
rng (np.RandomState): Random state to draw pseudo-random numbers from.
are_zones_from_sv: Boolean indicating whether zones are from social vulnerability analysis.
Returns:
pd.Series: social vulnerability scores.
"""
# Social vulnerability zone generator: this generalizes the code in the first version
sv_scores = np.zeros(num_households)
zones = households_df["Zone"].to_numpy()

# zones from social vulnerability analusis
if are_zones_from_sv:
zone_def = self.get_input_dataset("zone_def_sv")
if zone_def is None:
zone_def = self.__zone_def_sv
else:
zone_def = zone_def.get_json_reader()

# zones caulculated by household income (hhinc)
else:
zone_def = self.get_input_dataset("zone_def_hhinc")
if zone_def is None:
zone_def = self.__zone_def_hhinc
else:
zone_def = zone_def.get_json_reader()

for household in range(0, num_households):
spin = rng.rand()
zone = zones[household]

if spin < self.__sv_generator[zone]["threshold_0"]:
sv_scores[household] = round(
rng.uniform(
self.__sv_generator[zone]["below_lower"],
self.__sv_generator[zone]["below_upper"],
),
3,
)

# for zone 2, 3, 4 there is additional middle range
elif (
"threshold_1" in self.__sv_generator[zone].keys()
and spin < self.__sv_generator[zone]["threshold_1"]
):
sv_scores[household] = round(
rng.uniform(
self.__sv_generator[zone]["middle_lower"],
self.__sv_generator[zone]["middle_upper"],
),
3,
)
if are_zones_from_sv:
if spin < zone_def[zone]["threshold_0"]:
sv_scores[household] = round(
rng.uniform(
zone_def[zone]["below_lower"],
zone_def[zone]["below_upper"],
),
3,
)

# for zone 2, 3, 4 there is additional middle range
elif (
"threshold_1" in zone_def[zone].keys()
and spin < zone_def[zone]["threshold_1"]
):
sv_scores[household] = round(
rng.uniform(
zone_def[zone]["middle_lower"],
zone_def[zone]["middle_upper"],
),
3,
)
else:
sv_scores[household] = round(
rng.uniform(
zone_def[zone]["above_lower"],
zone_def[zone]["above_upper"],
),
3,
)

else:
sv_scores[household] = round(
rng.uniform(
self.__sv_generator[zone]["above_lower"],
self.__sv_generator[zone]["above_upper"],
),
3,
)
if spin < zone_def[zone]["threshold"]:
sv_scores[household] = round(
rng.uniform(
zone_def[zone]["below_lower"], zone_def[zone]["below_upper"]
),
3,
)
else:
sv_scores[household] = round(
rng.uniform(
zone_def[zone]["above_lower"], zone_def[zone]["above_upper"]
),
3,
)

return sv_scores

@staticmethod
def compute_zones_by_household_income(households_df):
"""
Compute the zones based on household income. Updates the household dataset
by adding a new `Zone` column and removing values with missing Zone.

Args:
households_df (pd.DataFrame): Vector position of a household.

Returns:
households_df (pd.DataFrame): Vector position of a household with additional zones.

"""
zone_map = {0: "Z5", 1: "Z4", 2: "Z3", 3: "Z2", 4: "Z1", np.nan: "missing"}

households_df["Zone"] = "missing"
households_df.loc[households_df["hhinc"] == 5, "Zone"] = zone_map[4]
households_df.loc[households_df["hhinc"] == 4, "Zone"] = zone_map[3]
households_df.loc[households_df["hhinc"] == 3, "Zone"] = zone_map[2]
households_df.loc[households_df["hhinc"] == 2, "Zone"] = zone_map[1]
households_df.loc[households_df["hhinc"] == 1, "Zone"] = zone_map[0]

# add an indicator showing the zones are from Social Vulnerability analysis
households_df["zone_indic"] = "hhinc"

return households_df[households_df["Zone"] != "missing"]

@staticmethod
def compute_regressions(markov_stages, household, lower, upper):
"""
Expand Down Expand Up @@ -535,11 +662,24 @@ def get_spec(self):
},
{
"id": "sv_result",
"required": True,
"required": False,
"description": "A csv file with zones containing demographic factors"
"qualified by a social vulnerability score",
"type": ["incore:socialVulnerabilityScore"],
},
{
"id": "zone_def_sv",
"required": False,
"description": "A json file with thresholds and definitions per zone "
"based on social vulnerability analysis",
"type": ["incore:zoneDefinitionsSocialVulnerability"],
},
{
"id": "zone_def_hhinc",
"required": False,
"description": "A json file with thresholds and definitions per zone based on household income",
"type": ["incore:zoneDefinitionsHouseholdIncome"],
},
],
"output_datasets": [
{
Expand Down
Loading
Loading