diff --git a/README.md b/README.md index 39a46c5..3273f13 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,26 @@ Request body: } ``` +#### Product Analytics Bulk Upload +Exports Google Analytics data gathered from the IDVA flow to Google Drive, as a google sheets object. Routine then builds pivot tables to enable user to read data easily. Default behaviour for the API `/analytics` writes data for the previous day. + +The ID of the Google Drive folder is configurable in `ANALYTICS_ROOT`. (`settings`) + +Optionally, the user can pass in a date range to be uploaded. The data is collated into a single document, and the same pivot tables are written on the collated data. + +`POST /analytics` +``` +Query parameters: None +``` +`POST /analytics/daterange` +```JSON +// Request body +{ + "startDate": "YYYY-MM-DD", + "endDate": "YYYY-MM-DD" +} +``` + ### Deploying to Cloud.gov during development All deployments require having the correct Cloud.gov credentials in place. If diff --git a/gdrive/analytics_api.py b/gdrive/analytics_api.py new file mode 100644 index 0000000..e58668e --- /dev/null +++ b/gdrive/analytics_api.py @@ -0,0 +1,83 @@ +""" +Google Analytics Rest API +""" + +from datetime import datetime, timedelta +import logging +from typing import Optional + +import fastapi +from pydantic import BaseModel +from fastapi import responses +from gdrive import analytics_client, idva_flow_analytics + +log = logging.getLogger(__name__) +router = fastapi.APIRouter() + + +class AnalyticsRequest(BaseModel): + startDate: str + endDate: str + + +@router.post("/analytics") +async def run_analytics_default(req: Optional[AnalyticsRequest] = None): + start = None + end = None + message = None + if req is None: + start = datetime.today() - timedelta(days=1) + message = "Analytics report for %s complete." % (datetime.date(start)) + else: + try: + start = datetime.strptime(req.startDate, analytics_client.API_DATE_FORMAT) + end = datetime.strptime(req.endDate, analytics_client.API_DATE_FORMAT) + message = "Analytics report for %s - %s complete." % ( + datetime.date(start), + datetime.date(end), + ) + except ValueError as err: + # @suppress("py/stack-trace-exposure") + return responses.JSONResponse( + status_code=422, + content="Failed (invalid date parameters): %s" % (err), + ) + + run_analytics(start, end) + return responses.JSONResponse( + status_code=202, + content=message, + ) + + +@router.post("/analytics/list") +async def list_accounts(): + list_accounts() + return responses.JSONResponse( + status_code=202, content="List request is being processed." + ) + + +def run_analytics(start_date: datetime, end_date: datetime): + try: + idva_flow_analytics.create_report(start_date, end_date) + except Exception as e: + log.error(e) + + +def list_accounts(): + try: + list_response = analytics_client.list() + if list_response is not None: + log.info("-------------------------------") + for act in list_response.accounts: + log.info("Name:\t\t%s" % (act.name)) + log.info("Display name:\t%s" % (act.display_name)) + log.info("-------------------------------") + else: + log.warn( + "List response was none. Ensure credentials are set correctly" + + " and you have access to the cloud property." + ) + except Exception as e: + log.error(e.args) diff --git a/gdrive/analytics_client.py b/gdrive/analytics_client.py new file mode 100644 index 0000000..9accc06 --- /dev/null +++ b/gdrive/analytics_client.py @@ -0,0 +1,109 @@ +import datetime + +from google.oauth2 import service_account +from google.analytics.admin import AnalyticsAdminServiceClient +from google.analytics.data_v1beta import BetaAnalyticsDataClient +from google.analytics.data_v1beta.types import ( + DateRange, + Dimension, + Metric, + RunReportRequest, + RunReportResponse, +) + +import logging +import pandas as pd + +from gdrive import settings + +log = logging.getLogger(__name__) + +creds = service_account.Credentials.from_service_account_info(settings.CREDENTIALS) +API_DATE_FORMAT = "%Y-%m-%d" + +""" +Client for the Google Analytics (GA4) API + +This class contains functions relating to downloading analytics data +for the IDVA flow. +""" + + +def download( + property_id, target_date: datetime, end_date: datetime = None +) -> RunReportResponse: + """ + Access Google Analytics (GA4) api and download desired analytics report. + """ + if end_date is None: + end_date = target_date + + request = RunReportRequest( + property=f"properties/{property_id}", + limit="250", + # https://developers.google.com/analytics/devguides/reporting/data/v1/api-schema + dimensions=[ + Dimension(name="eventName"), + Dimension(name="firstUserCampaignName"), + Dimension(name="firstUserMedium"), + Dimension(name="firstUserSource"), + Dimension(name="isConversionEvent"), + Dimension(name="linkUrl"), + ], + metrics=[ + Metric(name="eventCount"), + Metric(name="sessions"), + Metric(name="totalUsers"), + Metric(name="eventCountPerUser"), + Metric(name="conversions"), + ], + date_ranges=[ + DateRange( + start_date=format_date_for_api(target_date), + end_date=format_date_for_api(end_date), + ) + ], + ) + + return BetaAnalyticsDataClient(credentials=creds).run_report(request) + + +def list(): + """ + List the available properties the user has access to. Can be run to + verify setup of the enviornment is correct. + """ + client = AnalyticsAdminServiceClient(credentials=creds) + return client.list_accounts() + + +def format_date_for_api(date: datetime): + """ + Formats datetime object for Google Analytics Api (GA4) input + """ + return date.strftime(API_DATE_FORMAT) + + +def create_df_from_analytics_response(response: RunReportResponse): + """ + Extracts values from Google Analytics API response and transforms + them into pandas DataFrame for ease of use. This enables the analytics + client to do any processing of the data desired, if something comes up in + the future we want to do but isnt supported in GA4. + """ + all_headers = [] + for _, header in enumerate(response.dimension_headers): + all_headers += [header.name] + for _, header in enumerate(response.metric_headers): + all_headers += [header.name] + + arr = [all_headers] + for _, row in enumerate(response.rows): + row_li = [] + for _, val in enumerate(row.dimension_values): + row_li += [val.value] + for _, val in enumerate(row.metric_values): + row_li += [val.value] + arr += [row_li] + + return pd.DataFrame(arr) diff --git a/gdrive/api.py b/gdrive/api.py index c481ca0..2e19aa8 100644 --- a/gdrive/api.py +++ b/gdrive/api.py @@ -12,13 +12,13 @@ from googleapiclient.http import HttpError from starlette.requests import Request -from . import client, settings +from . import drive_client, settings log = logging.getLogger(__name__) router = fastapi.APIRouter() -client.init() +drive_client.init() # Patch zip decodeExtra to ignore invalid extra data @@ -50,16 +50,18 @@ async def upload_file( stream = io.BytesIO(body) - parent = client.create_folder(id, settings.ROOT_DIRECTORY) + parent = drive_client.create_folder(id, settings.ROOT_DIRECTORY) if zip: with zipfile.ZipFile(stream) as archive: files = archive.filelist for file in files: image = io.BytesIO(archive.read(file)) - client.upload_basic(f"{filename}_{file.filename}", parent, image) + drive_client.upload_basic( + f"{filename}_{file.filename}", parent, image + ) else: - client.upload_basic(filename, parent, stream) + drive_client.upload_basic(filename, parent, stream) except HttpError as error: log.error(f"An error occurred: {error}") @@ -73,10 +75,10 @@ async def delete_file(filename, response: Response): """ try: - files = client.get_files(filename) + files = drive_client.get_files(filename) if files: for file in files: - client.delete_file(file["id"]) + drive_client.delete_file(file["id"]) else: response.status_code = status.HTTP_404_NOT_FOUND diff --git a/gdrive/client.py b/gdrive/drive_client.py similarity index 80% rename from gdrive/client.py rename to gdrive/drive_client.py index 75c3f53..d148e2b 100644 --- a/gdrive/client.py +++ b/gdrive/drive_client.py @@ -14,8 +14,8 @@ creds = service_account.Credentials.from_service_account_info( settings.CREDENTIALS, scopes=settings.SCOPES ) + service = build("drive", "v3", credentials=creds) -sheets_service = build("sheets", "v4", credentials=creds) def init(): @@ -62,6 +62,22 @@ def list(count: int = 10, shared: bool = True) -> None: log.info(f"No such key: {error} in {item}") +def create_empty_spreadsheet(filename: str, parent_id: str) -> str: + file_metadata = { + "name": filename, + "parents": [parent_id], + "mimeType": "application/vnd.google-apps.spreadsheet", + } + + file = ( + service.files() + .create(body=file_metadata, fields="id", supportsAllDrives=True) + .execute() + ) + + return file.get("id") + + def drives_list(): """ List available shared drives @@ -164,54 +180,3 @@ def delete_file(id: str) -> None: """ service.files().delete(fileId=id, supportsAllDrives=True).execute() - - -def upload_participant( - first, - last, - email, - responseId, - time, - date, - ethnicity, - race, - gender, - age, - income, - skin_tone, -): - """ - Append participant data to spreadsheet - """ - values = [ - [ - first, - last, - first + " " + last, - email, - responseId, - time, - date, - ethnicity, - race, - gender, - income, - skin_tone, - ] - ] - - body = {"values": values} - result = ( - sheets_service.spreadsheets() - .values() - .append( - spreadsheetId=settings.SHEETS_ID, - range="Sheet1!A1", - valueInputOption="RAW", - body=body, - ) - .execute() - ) - if "error" in result: - raise error.ExportError(result["error"]["message"]) - return result diff --git a/gdrive/export_api.py b/gdrive/export_api.py index 350d31c..f4fad7f 100644 --- a/gdrive/export_api.py +++ b/gdrive/export_api.py @@ -10,7 +10,7 @@ from pydantic import BaseModel from fastapi import BackgroundTasks, responses -from gdrive import export_client, client, settings, error +from gdrive import export_client, drive_client, settings, error log = logging.getLogger(__name__) @@ -23,8 +23,8 @@ async def upload_file(interactionId): export_bytes = io.BytesIO( export_client.codename(json.dumps(export_data, indent=2)).encode() ) - parent = client.create_folder(interactionId, settings.ROOT_DIRECTORY) - client.upload_basic("analytics.json", parent, export_bytes) + parent = drive_client.create_folder(interactionId, settings.ROOT_DIRECTORY) + drive_client.upload_basic("analytics.json", parent, export_bytes) class ParticipantModel(BaseModel): @@ -85,7 +85,7 @@ async def survey_upload_response_task(request): if request.participant: participant = request.participant - client.upload_participant( + drive_client.upload_participant( participant.first, participant.last, participant.email, diff --git a/gdrive/idva_flow_analytics.py b/gdrive/idva_flow_analytics.py new file mode 100644 index 0000000..db7e020 --- /dev/null +++ b/gdrive/idva_flow_analytics.py @@ -0,0 +1,492 @@ +import datetime +import pandas as pd +import logging + +from gdrive import settings, sheets_client, drive_client, analytics_client + +log = logging.getLogger(__name__) + + +def create_report(start_date: datetime, end_date: datetime): + response = analytics_client.download( + settings.ANALYTICS_PROPERTY_ID, start_date, end_date + ) + + analytics_df = analytics_client.create_df_from_analytics_response(response) + sheets_id = export(analytics_df, start_date, end_date) + create_pages_and_pivot_tables(analytics_df, sheets_id=sheets_id) + + +def export( + df: pd.DataFrame, date_of_report: datetime, end_date: datetime = None +) -> str: + """ + Transform the downloaded response from the google analytics API into a + Google Sheets Object. + + This function first touches a Google Sheets object with the drive API, then + writes the analytics data to that object. As of right now there is no way to do + this in one API transaction. + + Args: + df (pandas.DataFrame): Tabular data to export to Google Sheets object + date_of_report (datetime): Date the report was run + Returns: + str: Google Sheets ID of the new Sheets object + """ + filename_str = generate_filename(date_of_report, end_date) + analytics_folder_id = drive_client.create_folder( + "Google Analytics", parent_id=settings.ANALYTICS_ROOT + ) + + # We have to do this in multiple steps with more than one client because the Sheets API + # doesnt support opening a file in a given directory. + sheets_id = drive_client.create_empty_spreadsheet(filename_str, analytics_folder_id) + log.info("Uploading to folder %s (%s)" % ("Google Analytics", analytics_folder_id)) + result = sheets_client.export_df_to_gdrive_speadsheet(df, sheets_id) + log.info( + "Successfully created %s (%s)" % (filename_str, result.get("spreadsheetId")) + ) + return sheets_id + + +def create_pages_and_pivot_tables(df: pd.DataFrame, sheets_id: str): + """ + Add new pages and pivot tables. + + This function is fairly naive and inefficient. If we ever want to make Google Sheets + more often than once a day, we should refactor this to limit the number of API transactions. + + Args: + df (pandas.DataFrame): Tabular data in the spreadsheet + sheets_id (str): Google Sheets object ID + """ + + page1 = "Rekrewt Pivot Table - First Visit" + page2 = "Rekrewt Pivot Table - Sessions" + page3 = "GSA Use Pivot Table" + page4 = "Completions" + + new_sheet_name_to_id = sheets_client.add_new_pages( + [page1, page2, page3, page4], sheets_id + ) + log.info("Added %s pages to %s" % (len(new_sheet_name_to_id.keys()), sheets_id)) + create_pivot_tables( + df, (page1, page2, page3, page4), new_sheet_name_to_id, sheets_id + ) + + +def create_pivot_tables( + df: pd.DataFrame, page_names: (str, str, str), names_to_id: dict, sheets_id: str +): + # Make a dictionary mapping the name of the column to its index, useful for the pivot tables. + col_dict = {} + for idx, val in enumerate(df.iloc[0]): + col_dict[val] = idx + + create_first_visit_pt(sheets_id, names_to_id[page_names[0]], col_dict) + log.info( + "Added 2 pivot tables to %s (%s)" % (page_names[0], names_to_id[page_names[0]]) + ) + + create_session_start_pt(sheets_id, names_to_id[page_names[1]], col_dict) + log.info( + "Added 2 pivot tables to %s (%s)" % (page_names[1], names_to_id[page_names[1]]) + ) + + create_clicks_pt(sheets_id, names_to_id[page_names[2]], col_dict) + log.info( + "Added pivot table to %s (%s)" % (page_names[2], names_to_id[page_names[2]]) + ) + + create_feedback_pt(sheets_id, names_to_id[page_names[3]], col_dict) + log.info( + "Added pivot table to %s (%s)" % (page_names[3], names_to_id[page_names[3]]) + ) + + sheets_client.update_cell_value( + sheets_id, page_names[0], "A17", "Total First Visits" + ) + sheets_client.update_cell_value( + sheets_id, + page_names[0], + "A18", + '=GETPIVOTDATA("SUM of eventCount",A1, "eventName", "first_visit") + GETPIVOTDATA("SUM of eventCount",F1, "eventName", "first_visit")', + ) + log.info("Wrote totals to %s" % (page_names[0])) + + sheets_client.update_cell_value(sheets_id, page_names[1], "A17", "Total Sessions") + sheets_client.update_cell_value( + sheets_id, + page_names[1], + "A18", + '=GETPIVOTDATA("SUM of eventCount",A1, "eventName", "session_start") + GETPIVOTDATA("SUM of eventCount",F1, "eventName", "session_start")', + ) + log.info("Wrote totals to %s" % (page_names[1])) + + +def create_first_visit_pt(sheets_id, page_id, col_dict): + first_visit_facebook_pt_def = { + "pivotTable": { + "source": { + # First Sheet (Sheet1) is always ID 0 + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["firstUserSource"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "filterSpecs": [ + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "first_visit", + } + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["eventName"], + }, + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "facebook", + }, + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["firstUserSource"], + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + first_visit_rt_pt_def = { + "pivotTable": { + "source": { + # First Sheet (Sheet1) is always ID 0 + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["firstUserSource"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "filterSpecs": [ + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "first_visit", + } + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["eventName"], + }, + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "rt", + }, + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["firstUserSource"], + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + + sheets_client.add_pivot_tables( + sheets_id, + page_id, + first_visit_facebook_pt_def, + ) + sheets_client.add_pivot_tables( + sheets_id, + page_id, + first_visit_rt_pt_def, + row_idx=0, + col_idx=5, + ) + + +def create_session_start_pt(sheets_id, page_id, col_dict): + # Add sessions pivot table, facebook + sessions_facebook_pt_def = { + "pivotTable": { + "source": { + # First Sheet (Sheet1) is always ID 0 + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["firstUserSource"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "filterSpecs": [ + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "session_start", + } + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["eventName"], + }, + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "facebook", + }, + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["firstUserSource"], + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + sessions_rt_pt_def = { + "pivotTable": { + "source": { + # First Sheet (Sheet1) is always ID 0 + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["firstUserSource"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "filterSpecs": [ + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "session_start", + } + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["eventName"], + }, + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "rt", + }, + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["firstUserSource"], + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + + sheets_client.add_pivot_tables(sheets_id, page_id, sessions_facebook_pt_def) + sheets_client.add_pivot_tables( + sheets_id, page_id, sessions_rt_pt_def, row_idx=0, col_idx=5 + ) + + +def create_clicks_pt(sheets_id, page_id, col_dict): + clicks_pt_def = { + "pivotTable": { + "source": { + # First Sheet (Sheet1) is always ID 0 + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + + sheets_client.add_pivot_tables(sheets_id, page_id, clicks_pt_def) + + +def create_feedback_pt(sheets_id, page_id, col_dict): + feedback_pt_def = { + "pivotTable": { + "source": { + "sheetId": 0, + }, + "rows": [ + { + "sourceColumnOffset": col_dict["eventName"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + { + "sourceColumnOffset": col_dict["eventCount"], + "showTotals": True, + "sortOrder": "ASCENDING", + }, + ], + "filterSpecs": [ + { + "filterCriteria": { + "condition": { + "type": "TEXT_CONTAINS", + "values": [ + { + "userEnteredValue": "feedback", + } + ], + }, + "visibleByDefault": True, + }, + "columnOffsetIndex": col_dict["linkUrl"], + }, + ], + "values": [ + { + "summarizeFunction": "SUM", + "sourceColumnOffset": col_dict["eventCount"], + } + ], + "valueLayout": "HORIZONTAL", + } + } + + sheets_client.add_pivot_tables(sheets_id, page_id, feedback_pt_def) + + +def generate_filename(date: datetime, end_date: datetime = None): + """ + Return filename for the new spreadsheet to be saved as + + Args: + date (datetime): date to format + Return: + str: Formatted Date + """ + ret = date.strftime("%Y%m%d") + if end_date is not None and end_date != date: + ret += "-%s" % (end_date.strftime("%Y%m%d")) + return ret diff --git a/gdrive/main.py b/gdrive/main.py index 4a05883..185877c 100644 --- a/gdrive/main.py +++ b/gdrive/main.py @@ -6,7 +6,7 @@ import fastapi import starlette_prometheus -from . import api, export_api, settings +from . import api, export_api, settings, analytics_api logging.basicConfig(level=settings.LOG_LEVEL) @@ -17,3 +17,4 @@ app.include_router(api.router) app.include_router(export_api.router) +app.include_router(analytics_api.router) diff --git a/gdrive/settings.py b/gdrive/settings.py index b904d0a..4bae7bb 100644 --- a/gdrive/settings.py +++ b/gdrive/settings.py @@ -15,13 +15,17 @@ LOG_LEVEL = os.getenv("LOG_LEVEL", logging.getLevelName(logging.INFO)) SCOPES = [ + "https://www.googleapis.com/auth/analytics", "https://www.googleapis.com/auth/drive", "https://www.googleapis.com/auth/spreadsheets", ] + SERVICE_ACCOUNT_FILE = "credentials.json" ROOT_DIRECTORY = "" CODE_NAMES = None CREDENTIALS = None +ANALYTICS_ROOT = None +ANALYTICS_PROPERTY_ID = None ES_HOST = os.getenv("ES_HOST") ES_PORT = os.getenv("ES_PORT") @@ -44,6 +48,8 @@ log.info("Loading credentials from creds file") config = json.load(file) CREDENTIALS = config["credentials"] + ANALYTICS_ROOT = config["analytics_root"] + ANALYTICS_PROPERTY_ID = config["analytics_property_id"] ROOT_DIRECTORY = config["root_directory"] CODE_NAMES = config["code_names"] SHEETS_ID = config["sheets_id"] diff --git a/gdrive/sheets_client.py b/gdrive/sheets_client.py new file mode 100644 index 0000000..2c766a5 --- /dev/null +++ b/gdrive/sheets_client.py @@ -0,0 +1,231 @@ +import logging +import pandas as pd +from typing import List + +from google.oauth2 import service_account +from googleapiclient.discovery import build + +from gdrive import settings, error + +log = logging.getLogger(__name__) + +creds = service_account.Credentials.from_service_account_info( + settings.CREDENTIALS, scopes=settings.SCOPES +) + +sheets_service = build("sheets", "v4", credentials=creds) + +""" +At present, every function call in this library represents its own API +transaction. If a lot of operations were to be made at once, this would hinder speed +quite a bit. + +Some batching in the future if the use case for this library gets heavier is necessary. +""" + + +def update_cell_value( + sheet_id: str, page_name: str, range_str: str, value: str, vio="USER_ENTERED" +): + """ + Write the specifed value to specified range + + Args: + sheet_id (str): Google sheets object ID + page_name (str): page target to edit + range_str (str): range to write the values to + value (str): value to write to the specified location + vio (str): + default (str): "USER_ENTERED" User entered values get resolved by googles parsing function. + Functions, Integers and strings can all be entered this way. + + Returns: + Google API Raw Result + """ + body = { + "values": [ + # Cell values + [ + value, + ] + # Other values + ] + } + + result = ( + sheets_service.spreadsheets() + .values() + .update( + spreadsheetId=sheet_id, + range="%s!%s" % (page_name, range_str), + valueInputOption=vio, + body=body, + ) + .execute() + ) + + return result + + +def add_pivot_tables( + sheets_id: str, + target_page_id: str, + pt_def: object, + row_idx: int = 0, + col_idx: int = 0, +): + """ + Writes the pivot table definition to the specified location. + + Args: + sheets_id (str): ID for the sheets object + target_page_id (str): ID for the target page of the sheets object, (Sheet1 is always 0) + pt_def (object): JSON encoded dict + row_idx (int): Index of the row to write the start of the table + default: 0 + col_idx (int): Index of the column to write the start of the table + default: 0 + + Returns: + Google Sheets API Response: RAW response to the write operation + """ + requests = [ + { + "updateCells": { + "rows": { + # I would need to write a whole library to parameterize this well so + # Client Code will just need to pass the JSON definitions in. + "values": pt_def + }, + "start": { + "sheetId": target_page_id, + "rowIndex": row_idx, + "columnIndex": col_idx, + }, + "fields": "pivotTable", + } + } + ] + + body = {"requests": requests} + + response = ( + sheets_service.spreadsheets() + .batchUpdate(spreadsheetId=sheets_id, body=body) + .execute() + ) + + return response + + +def add_new_pages(page_names: [str], sheets_id: str): + new_sheets_reqs = [] + for label in page_names: + req = { + "addSheet": { + "properties": { + "title": label, + } + } + } + + new_sheets_reqs.append(req) + + body = {"requests": new_sheets_reqs} + + result = ( + sheets_service.spreadsheets() + .batchUpdate( + spreadsheetId=sheets_id, + body=body, + ) + .execute() + ) + + sheet_title_to_id = {} + for reply in result.get("replies"): + props = reply.get("addSheet").get("properties") + sheet_title_to_id[props.get("title")] = props.get("sheetId") + + return sheet_title_to_id + + +def export_df_to_gdrive_speadsheet(df: pd.DataFrame, sheets_id: str, title="Sheet1"): + """ + Exports an entire pandas dataframe to a Google Sheets Object. + + Args: + df (pandas.DataFrame): Tabular data to be exported to a spreadsheet + title (str): Title for the target spreadsheet to write the data to. + default: "Sheet1" default value for new Google Sheets sheets object + + Returns: + Google Sheets API Response: RAW response to the write operation + """ + body = {"values": df.values.tolist()} + result = ( + sheets_service.spreadsheets() + .values() + .append( + spreadsheetId=sheets_id, + range="%s!A1" % (title), + valueInputOption="USER_ENTERED", + body=body, + ) + .execute() + ) + if "error" in result: + raise error.ExportError(result["error"]["message"]) + + return result + + +def upload_participant( + first, + last, + email, + responseId, + time, + date, + ethnicity, + race, + gender, + age, + income, + skin_tone, +): + """ + Append participant data to the rekrewt raw completions spreadsheet + """ + values = [ + [ + first, + last, + first + " " + last, + email, + responseId, + time, + date, + ethnicity, + race, + gender, + income, + skin_tone, + ] + ] + + body = {"values": values} + result = ( + sheets_service.spreadsheets() + .values() + .append( + spreadsheetId=settings.SHEETS_ID, + range="Sheet1!A1", + valueInputOption="RAW", + body=body, + ) + .execute() + ) + if "error" in result: + raise error.ExportError(result["error"]["message"]) + return result diff --git a/requirements.txt b/requirements.txt index cbc9ce9..10ae035 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,13 @@ fastapi==0.103.2 uvicorn==0.23.2 starlette-prometheus==0.9.0 +google-analytics-admin==0.20.0 +google-analytics-data==0.17.1 +google-api-core==2.11.1 google-api-python-client==2.102.0 +google-auth==2.23.0 google-auth-httplib2==0.1.1 google-auth-oauthlib==1.1.0 +googleapis-common-protos==1.60.0 opensearch-py==2.3.1 +pandas==2.1.1 diff --git a/tests/test_api.py b/tests/test_api.py index 3359e2c..a5223fc 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -7,7 +7,9 @@ from fastapi import testclient # pylint: disable=wrong-import-position -sys.modules["gdrive.client"] = MagicMock() +sys.modules["gdrive.drive_client"] = MagicMock() +sys.modules["gdrive.sheets_client"] = MagicMock() +sys.modules["gdrive.analytics_client"] = MagicMock() from gdrive import main client = testclient.TestClient(main.app)