Skip to content

Commit

Permalink
Added date filter
Browse files Browse the repository at this point in the history
  • Loading branch information
vloothuis committed Nov 26, 2023
1 parent cc9733c commit 70f4ee5
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 10 deletions.
Binary file modified public/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file modified src/framework/processing/py/dist/port-0.0.0-py3-none-any.whl
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
18 changes: 8 additions & 10 deletions src/framework/processing/py/port/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

ExtractionResult = namedtuple("ExtractionResult", ["id", "title", "data_frame"])

filter_start_date = datetime(2017, 1, 1)


class FileInZipNotFoundError(Exception):
"""Raised when a specific file is not found within the ZIP archive."""
Expand All @@ -25,19 +27,13 @@ class InvalidXMLError(Exception):
class HealthDataHandler(xml.sax.ContentHandler):
def __init__(self, callback):
self.callback = callback
self.in_step_count = False

def startElement(self, tag, attributes):
if tag == "Record" and attributes["type"] == "HKQuantityTypeIdentifierStepCount":
self.in_step_count = True
value = int(attributes["value"])
startDate = self.parse_naive_datetime(attributes["startDate"])
self.callback(value, startDate)
start_date = self.parse_naive_datetime(attributes["startDate"])
self.callback(value, start_date)

def endElement(self, tag):
if tag == "Record" and self.in_step_count:
self.in_step_count = False

def parse_naive_datetime(self, date_str):
dt = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S %z')
return dt.replace(tzinfo=None)
Expand All @@ -47,9 +43,11 @@ def __init__(self):
self.start_times = []
self.steps = []

def __call__(self, value, startDate):
def __call__(self, value, start_date):
if start_date < filter_start_date:
return
self.steps.append(value)
self.start_times.append(startDate)
self.start_times.append(start_date)

def to_dataframe(self):
return pd.DataFrame({'Start Time': self.start_times, 'Steps': self.steps})
Expand Down
Binary file not shown.
Binary file not shown.
14 changes: 14 additions & 0 deletions src/framework/processing/py/tests/script_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,20 @@ def test_aggregate_same_day():
assert df.iloc[0]['Date'] == '2023-06-25'
assert df.iloc[0]['Steps'] == 40


def test_filters_out_data_based_on_date():
# see the script for the earliest data which will be included
xml_data = """
<HealthData locale="en_NL">
<Record type="HKQuantityTypeIdentifierStepCount" startDate="2016-06-24 23:10:45 +0100" value="2"/>
<Record type="HKQuantityTypeIdentifierStepCount" startDate="2017-01-1 23:10:45 +0100" value="3"/>
</HealthData>
"""
df = aggregate_daily_steps(io.StringIO(xml_data))
assert len(df) == 1
assert df.iloc[0]['Date'] == '2017-01-01'
assert df.iloc[0]['Steps'] == 3

def test_no_records():
xml_data = """
<HealthData locale="en_NL">
Expand Down

0 comments on commit 70f4ee5

Please sign in to comment.