From 2bd5f49414023075ea6e0861aaff64b2f571f8f6 Mon Sep 17 00:00:00 2001 From: ridhi96 Date: Thu, 16 Nov 2023 23:42:32 +0000 Subject: [PATCH] created all tables in BigQuery --- create_bq_tables.ipynb | 225 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 create_bq_tables.ipynb diff --git a/create_bq_tables.ipynb b/create_bq_tables.ipynb new file mode 100644 index 0000000..9c1f821 --- /dev/null +++ b/create_bq_tables.ipynb @@ -0,0 +1,225 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5e5fe419-6a99-4aa5-a4b4-c14f0c8038ec", + "metadata": {}, + "source": [ + "**Execute createTables.sh to create BigQuery tables of processed data**" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "24c29de1-76dd-4574-ac41-f702016a48ed", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/program_area_2020.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r437832d57d68de03_0000018bda760bc8_1 ... (4s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/program_area_time_rides_2018.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r5c5c7815a20d04bc_0000018bda762730_1 ... (2s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/program_area_time_rides_2019.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r7f7904794e836ed3_0000018bda763a3f_1 ... (4s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/program_area_time_rides_2021.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r70332df88fd21e9c_0000018bda76550c_1 ... (3s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/program_area_time_rides_2022.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r6b92ee689f011b8f_0000018bda766b9e_1 ... (5s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2018.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r396e8a956fe2afb_0000018bda768ad1_1 ... (14s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2019.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r4d4c4ae64b77786d_0000018bda76cebf_1 ... (14s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2020.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r580e35676ff43a38_0000018bda77125d_1 ... (22s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2021.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r730d35da7bd6d21d_0000018bda7774f4_1 ... (22s) Current status: DONE \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GCS Folder Path: gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2022.csv/*.csv\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r26148a34bbf177c9_0000018bda77d7a8_1 ... (22s) Current status: DONE \n" + ] + } + ], + "source": [ + "%%bash\n", + "\n", + "bash \"bdp-rideshare/createTables.sh\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "fb8ef954-1efa-43d9-aef7-e4186daf61fe", + "metadata": {}, + "source": [ + "**Create table for community areas stored in newline-delimited GeoJSON format**" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8d4c7418-8481-427c-8097-6e7dae4058d4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Waiting on bqjob_r49f1edd383a29152_0000018bda826616_1 ... (2s) Current status: DONE \n" + ] + } + ], + "source": [ + "!bq load \\\n", + " --source_format=NEWLINE_DELIMITED_JSON \\\n", + " --json_extension=GEOJSON \\\n", + " --autodetect \\\n", + " chicago_rideshare.community_areas \\\n", + " gs://msca-bdp-student-gcs/bdp-rideshare-project/neighborhoods/geojson/community_nl.geojsonl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b5d91e7-5a05-4e61-8931-f593a49ba159", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}