Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 45m no_output_timeout, include long line breaks in base #85

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
path: htmlcov
- run:
name: 'Integration Tests'
no_output_timeout: 45m
command: |
source dev_env.sh
mkdir /tmp/${CIRCLE_PROJECT_REPONAME}
Expand Down
57 changes: 39 additions & 18 deletions tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class GoogleAnalyticsBaseTest(BaseCase):
START_DATE_FORMAT = "%Y-%m-%dT00:00:00Z"
REPLICATION_KEY_FORMAT = "%Y-%m-%dT00:00:00.000000Z"

start_date = ""
start_date = dt.strftime(dt.utcnow() - timedelta(days=30), START_DATE_FORMAT)

@staticmethod
def tap_name():
Expand All @@ -49,9 +49,11 @@ def get_properties(self, original: bool = True):
"""Configuration properties required for the tap."""
return_value = {
# start_date hard coded to prevent data from aging out, new data stopped Oct 5, 2023
'start_date' : "2023-09-01T00:00:00Z",
#'start_date' : "2023-09-01T00:00:00Z", # TODO remove if new data allows dynamic date
'start_date' : self.start_date,
'view_id': os.getenv('TAP_GOOGLE_ANALYTICS_VIEW_ID'),
'report_definitions': [{"id": "a665732c-d18b-445c-89b2-5ca8928a7305", "name": "Test Report 1"}]
'report_definitions': [{"id": "a665732c-d18b-445c-89b2-5ca8928a7305",
"name": "Test Report 1"}]
}
if original:
return return_value
Expand Down Expand Up @@ -205,11 +207,13 @@ def run_and_verify_check_mode(self, conn_id):
menagerie.verify_check_exit_status(self, exit_status, check_job_name)

found_catalogs = menagerie.get_catalogs(conn_id)
self.assertGreater(len(found_catalogs), 0, msg="unable to locate schemas for connection {}".format(conn_id))
self.assertGreater(len(found_catalogs), 0,
msg="unable to locate schemas for connection {}".format(conn_id))

found_catalog_names = set(map(lambda c: c['stream_name'], found_catalogs))

self.assertSetEqual(self.expected_sync_streams(), found_catalog_names, msg="discovered schemas do not match")
self.assertSetEqual(self.expected_sync_streams(), found_catalog_names,
msg="discovered schemas do not match")
LOGGER.info("discovered schemas are OK")

return found_catalogs
Expand Down Expand Up @@ -291,18 +295,25 @@ def perform_and_verify_table_and_field_selection(self, conn_id, test_catalogs,

# collect field selection expecationas
expected_automatic_fields = self.expected_automatic_fields()[cat['stream_name']]
selected_default_fields = expected_default_fields[cat['stream_name']] if select_default_fields else set()
selected_pagination_fields = expected_pagination_fields[cat['stream_name']] if select_pagination_fields else set()
selected_default_fields = expected_default_fields[cat['stream_name']] \
if select_default_fields else set()
selected_pagination_fields = expected_pagination_fields[cat['stream_name']] \
if select_pagination_fields else set()

# Verify all intended fields within the stream are selected
if non_selected_props:
expected_selected_fields = self.get_all_fields(catalog_entry) - non_selected_props.get(cat['stream_name'],set())
expected_selected_fields = self.get_all_fields(catalog_entry) \
- non_selected_props.get(cat['stream_name'],set())
else:
expected_selected_fields = expected_automatic_fields | selected_default_fields | selected_pagination_fields
expected_selected_fields = expected_automatic_fields | selected_default_fields | \
selected_pagination_fields
selected_fields = self._get_selected_fields_from_metadata(catalog_entry['metadata'])
for field in expected_selected_fields:
field_selected = field in selected_fields
LOGGER.info("\tValidating field selection on %s.%s: %s", cat['stream_name'], field, field_selected)
LOGGER.info("\tValidating field selection on %s.%s: %s",
cat['stream_name'],
field,
field_selected)

self.assertSetEqual(expected_selected_fields, selected_fields)

Expand All @@ -319,7 +330,12 @@ def _get_selected_fields_from_metadata(metadata):
selected_fields.add(field['breadcrumb'][1])
return selected_fields

def _select_streams_and_fields(self, conn_id, catalogs, select_default_fields, select_pagination_fields, non_selected_props=dict()):
def _select_streams_and_fields(self,
conn_id,
catalogs,
select_default_fields,
select_pagination_fields,
non_selected_props=dict()):
"""Select all streams and all fields within streams"""

for catalog in catalogs:
Expand Down Expand Up @@ -350,7 +366,7 @@ def _select_streams_and_fields(self, conn_id, catalogs, select_default_fields, s
@staticmethod
def parse_date(date_value):
"""
Pass in string-formatted-datetime, parse the value, and return it as an unformatted datetime object.
Pass in string-formatted-datetime, parse, and return as an unformatted datetime object.
"""
date_formats = {
"%Y-%m-%dT%H:%M:%S.%fZ",
Expand All @@ -366,7 +382,8 @@ def parse_date(date_value):
except ValueError:
continue

raise NotImplementedError("Tests do not account for dates of this format: {}".format(date_value))
raise NotImplementedError("Tests do not account for dates of this format: {}".format(
date_value))

def timedelta_formatted(self, dtime, days=0):
try:
Expand All @@ -383,7 +400,8 @@ def timedelta_formatted(self, dtime, days=0):
return dt.strftime(return_date, self.REPLICATION_KEY_FORMAT)

except ValueError:
return Exception("Datetime object is not of the format: {}".format(self.START_DATE_FORMAT))
return Exception("Datetime object is not of the format: {}".format(
self.START_DATE_FORMAT))

##########################################################################
### Tap Specific Methods
Expand Down Expand Up @@ -411,7 +429,9 @@ def expected_default_fields():
},
"Audience Overview": {
"ga:users", "ga:newUsers", "ga:sessions", "ga:sessionsPerUser", "ga:pageviews",
"ga:pageviewsPerSession", "ga:avgSessionDuration", "ga:bounceRate", "ga:date",'ga:month','ga:operatingSystem','ga:language','ga:hour','ga:browser','ga:year','ga:country','ga:city'
"ga:pageviewsPerSession", "ga:avgSessionDuration", "ga:bounceRate", "ga:date",
'ga:month','ga:operatingSystem','ga:language','ga:hour','ga:browser','ga:year',
'ga:country','ga:city'
},
"Audience Geo Location": {
"ga:users", "ga:newUsers", "ga:sessions", "ga:pageviewsPerSession",
Expand All @@ -420,12 +440,13 @@ def expected_default_fields():
},
"Audience Technology": {
"ga:users", "ga:newUsers", "ga:sessions", "ga:pageviewsPerSession",
"ga:avgSessionDuration", "ga:bounceRate", "ga:date", "ga:browser", "ga:operatingSystem"
"ga:avgSessionDuration", "ga:bounceRate", "ga:date", "ga:browser",
"ga:operatingSystem"
},
"Acquisition Overview": {
"ga:sessions", "ga:pageviewsPerSession", "ga:avgSessionDuration", "ga:bounceRate",
"ga:acquisitionTrafficChannel", "ga:acquisitionSource", "ga:acquisitionSourceMedium",
"ga:acquisitionMedium"
"ga:acquisitionTrafficChannel", "ga:acquisitionSource",
"ga:acquisitionSourceMedium", "ga:acquisitionMedium"
},
"Behavior Overview": {
"ga:pageviews", "ga:uniquePageviews", "ga:avgTimeOnPage", "ga:bounceRate",
Expand Down