Skip to content

Commit

Permalink
Nancodes for claims_hosp:
Browse files Browse the repository at this point in the history
* add missing columns, allow nan values
  • Loading branch information
dshemetov committed Nov 9, 2021
1 parent 0b7103a commit e804677
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 15 deletions.
1 change: 0 additions & 1 deletion _delphi_utils_python/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

from delphi_utils import create_export_csv, Nans


def _clean_directory(directory):
"""Clean files out of a directory."""
for fname in listdir(directory):
Expand Down
42 changes: 36 additions & 6 deletions claims_hosp/delphi_claims_hosp/update_indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# third party
import numpy as np
import pandas as pd
from delphi_utils import GeoMapper
from delphi_utils import GeoMapper, Nans

# first party
from delphi_utils import Weekday
Expand Down Expand Up @@ -235,7 +235,7 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
all_rates = output_dict["rates"]
all_se = output_dict["se"]
all_include = output_dict["include"]
out_n = 0
out_n, out_i = 0, 0
for i, date in enumerate(dates):
filename = "%s/%s_%s_%s.csv" % (
output_path,
Expand All @@ -244,7 +244,10 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
self.signal_name,
)
with open(filename, "w") as outfile:
outfile.write("geo_id,val,se,direction,sample_size\n")
outfile.write(
"geo_id,val,se,direction,sample_size," +
"missing_val,missing_se,missing_sample_size\n"
)
for geo_id in geo_ids:
val = all_rates[geo_id][i]
se = all_se[geo_id][i]
Expand All @@ -257,11 +260,38 @@ def write_to_csv(self, output_dict, output_path="./receiving"):
if self.write_se:
assert val > 0 and se > 0, "p=0, std_err=0 invalid"
outfile.write(
"%s,%f,%s,%s,%s\n" % (geo_id, val, se, "NA", "NA"))
"%s,%f,%s,%s,%s,%d,%d,%d\n" % (
geo_id, val, se, "NA", "NA",
Nans.NOT_MISSING.value,
Nans.NOT_MISSING.value,
Nans.NOT_APPLICABLE.value
)
)
else:
# for privacy reasons we will not report the standard error
outfile.write(
"%s,%f,%s,%s,%s\n" % (geo_id, val, "NA", "NA", "NA"))
"%s,%f,%s,%s,%s,%d,%d,%d\n" % (
geo_id, val, "NA", "NA", "NA",
Nans.NOT_MISSING.value,
Nans.CENSORED.value,
Nans.NOT_APPLICABLE.value
)
)
out_n += 1
else:
# Write nans out anyway for versioning
logging.warning("writing insufficient data for geo_id {0}, {1}".format(
geo_id, i
))
outfile.write(
"%s,%s,%s,%s,%s,%d,%d,%d\n" % (
geo_id, "NA", "NA", "NA", "NA",
Nans.CENSORED.value,
Nans.CENSORED.value,
Nans.NOT_APPLICABLE.value
)
)
out_i += 1

logging.debug("wrote %d rows for %d %s", out_n, len(geo_ids), geo_level)
logging.debug("wrote %d valued csvs for %d %s", out_n, len(geo_ids), geo_level)
logging.debug("wrote %d nan-valued csvs for %d %s", out_i, len(geo_ids), geo_level)
3 changes: 1 addition & 2 deletions claims_hosp/tests/test_indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,12 @@ def test_fit_fips(self):
date_range = pd.date_range("2020-05-01", "2020-05-20")
all_fips = self.fips_data.fips.unique()
loc_index_fips_data = self.fips_data.set_index(["fips", "timestamp"])
sample_fips = nr.choice(all_fips, 10)
sample_fips = all_fips[:50]

for fips in sample_fips:
sub_data = loc_index_fips_data.loc[fips]
sub_data = sub_data.reindex(date_range, fill_value=0)
res0 = ClaimsHospIndicator.fit(sub_data, date_range[0], fips)
# first value is burn-in
assert np.min(res0["rate"][1:]) > 0
assert np.max(res0["rate"][1:]) <= 100

Expand Down
14 changes: 8 additions & 6 deletions claims_hosp/tests/test_update_indicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,9 @@ def test_write_to_csv_results(self):
expected_name = f"20200502_geography_{Config.signal_name}.csv"
assert exists(join(td.name, expected_name))
output_data = pd.read_csv(join(td.name, expected_name))
expected_columns = ["geo_id", "val", "se", "direction", "sample_size", "missing_val", "missing_se", "missing_sample_size"]
assert (
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
output_data.columns == expected_columns
).all()
assert (output_data.geo_id == ["a", "b"]).all()
assert np.array_equal(output_data.val.values, np.array([0.1, 1]))
Expand All @@ -159,10 +160,10 @@ def test_write_to_csv_results(self):
assert exists(join(td.name, expected_name))
output_data = pd.read_csv(join(td.name, expected_name))
assert (
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
output_data.columns == expected_columns
).all()
assert (output_data.geo_id == ["a"]).all()
assert np.array_equal(output_data.val.values, np.array([0.5]))
assert (output_data.geo_id == ["a", "b"]).all()
assert np.array_equal(output_data.val.values, np.array([0.5, np.nan]), equal_nan=True)
assert np.isnan(output_data.se.values).all()
assert np.isnan(output_data.direction.values).all()
assert np.isnan(output_data.sample_size.values).all()
Expand All @@ -171,7 +172,7 @@ def test_write_to_csv_results(self):
assert exists(join(td.name, expected_name))
output_data = pd.read_csv(join(td.name, expected_name))
assert (
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
output_data.columns == expected_columns
).all()
assert (output_data.geo_id == ["a", "b"]).all()
assert np.array_equal(output_data.val.values, np.array([1.5, 3]))
Expand Down Expand Up @@ -224,8 +225,9 @@ def test_write_to_csv_with_se_results(self):
expected_name = f"20200502_geography_{signal_name}.csv"
assert exists(join(td.name, expected_name))
output_data = pd.read_csv(join(td.name, expected_name))
expected_columns = ["geo_id", "val", "se", "direction", "sample_size", "missing_val", "missing_se", "missing_sample_size"]
assert (
output_data.columns == ["geo_id", "val", "se", "direction", "sample_size"]
output_data.columns == expected_columns
).all()
assert (output_data.geo_id == ["a", "b"]).all()
assert np.array_equal(output_data.val.values, np.array([0.1, 1]))
Expand Down

0 comments on commit e804677

Please sign in to comment.