From 078cb1cbcca8fade6eb65412f58c010f3d190bc0 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 23 Nov 2023 05:19:15 +0000 Subject: [PATCH] added some articles and cited some methods --- unsupervised_ml.ipynb | 1880 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1872 insertions(+), 8 deletions(-) diff --git a/unsupervised_ml.ipynb b/unsupervised_ml.ipynb index 8118a2c..cf93e32 100644 --- a/unsupervised_ml.ipynb +++ b/unsupervised_ml.ipynb @@ -7,15 +7,126 @@ "source": [ "# Unsupervised ML\n", "\n", - "TEXT" + "This unsupervised component will be a \n", + "\n", + "Here's some Apache documentation of methods that could be useful:\n", + "\n", + "https://spark.apache.org/docs/latest/ml-clustering.html\n", + "https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.clustering.KMeans.html\n", + "\n", + "This article is a good start. It has three parts:\n", + "\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-one/\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-two/\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-three/\n", + "\n", + "\n", + "This article is fine, but some methods are extremely advanced, and we don't have the neccessary packages installed:\n", + "\n", + "https://towardsdatascience.com/time-series-clustering-deriving-trends-and-archetypes-from-sequential-data-bb87783312b4" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "241961fd-69dd-4036-839c-d5ff609e034a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('spark.stage.maxConsecutiveAttempts', '10'),\n", + " ('spark.dynamicAllocation.minExecutors', '1'),\n", + " ('spark.eventLog.enabled', 'true'),\n", + " ('spark.submit.pyFiles',\n", + " '/root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,/root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,/root/.ivy2/jars/com.typesafe_config-1.4.2.jar,/root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,/root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,/root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,/root/.ivy2/jars/com.navigamez_greex-1.0.jar,/root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,/root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,/root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,/root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,/root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,/root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,/root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,/root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,/root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,/root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,/root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,/root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,/root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,/root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,/root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,/root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,/root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,/root/.ivy2/jars/com.google.api_gax-2.20.1.jar,/root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,/root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,/root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,/root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,/root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,/root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,/root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,/root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,/root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,/root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,/root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,/root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,/root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,/root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,/root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,/root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,/root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,/root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,/root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,/root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,/root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.dataproc.metrics.listener.metrics.collector.hostname',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", + " ('spark.dataproc.sql.joinConditionReorder.enabled', 'true'),\n", + " ('spark.kryoserializer.buffer.max', '2000M'),\n", + " ('spark.driver.port', '40739'),\n", + " ('spark.serializer', 'org.apache.spark.serializer.KryoSerializer'),\n", + " ('spark.dataproc.sql.local.rank.pushdown.enabled', 'true'),\n", + " ('spark.driver.host',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal'),\n", + " ('spark.driver.maxResultSize', '0'),\n", + " ('spark.yarn.unmanagedAM.enabled', 'true'),\n", + " ('spark.ui.filters',\n", + " 'org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter'),\n", + " ('spark.metrics.namespace',\n", + " 'app_name:${spark.app.name}.app_id:${spark.app.id}'),\n", + " ('spark.executor.memory', '4g'),\n", + " ('spark.dataproc.sql.optimizer.leftsemijoin.conversion.enabled', 'true'),\n", + " ('spark.app.startTime', '1700709314208'),\n", + " ('spark.hadoop.hive.execution.engine', 'mr'),\n", + " ('spark.executorEnv.PYTHONPATH',\n", + " '{{PWD}}/pyspark.zip{{PWD}}/py4j-0.10.9-src.zip{{PWD}}/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar{{PWD}}/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar{{PWD}}/com.typesafe_config-1.4.2.jar{{PWD}}/org.rocksdb_rocksdbjni-6.29.5.jar{{PWD}}/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar{{PWD}}/com.github.universal-automata_liblevenshtein-3.0.0.jar{{PWD}}/com.google.cloud_google-cloud-storage-2.16.0.jar{{PWD}}/com.navigamez_greex-1.0.jar{{PWD}}/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar{{PWD}}/it.unimi.dsi_fastutil-7.0.12.jar{{PWD}}/org.projectlombok_lombok-1.16.8.jar{{PWD}}/com.google.guava_guava-31.1-jre.jar{{PWD}}/com.google.guava_failureaccess-1.0.1.jar{{PWD}}/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar{{PWD}}/com.google.errorprone_error_prone_annotations-2.16.jar{{PWD}}/com.google.j2objc_j2objc-annotations-1.3.jar{{PWD}}/com.google.http-client_google-http-client-1.42.3.jar{{PWD}}/io.opencensus_opencensus-contrib-http-util-0.31.1.jar{{PWD}}/com.google.http-client_google-http-client-jackson2-1.42.3.jar{{PWD}}/com.google.http-client_google-http-client-gson-1.42.3.jar{{PWD}}/com.google.api-client_google-api-client-2.1.1.jar{{PWD}}/commons-codec_commons-codec-1.15.jar{{PWD}}/com.google.oauth-client_google-oauth-client-1.34.1.jar{{PWD}}/com.google.http-client_google-http-client-apache-v2-1.42.3.jar{{PWD}}/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar{{PWD}}/com.google.code.gson_gson-2.10.jar{{PWD}}/com.google.cloud_google-cloud-core-2.9.0.jar{{PWD}}/com.google.auto.value_auto-value-annotations-1.10.1.jar{{PWD}}/com.google.cloud_google-cloud-core-http-2.9.0.jar{{PWD}}/com.google.http-client_google-http-client-appengine-1.42.3.jar{{PWD}}/com.google.api_gax-httpjson-0.105.1.jar{{PWD}}/com.google.cloud_google-cloud-core-grpc-2.9.0.jar{{PWD}}/io.grpc_grpc-core-1.51.0.jar{{PWD}}/com.google.api_gax-2.20.1.jar{{PWD}}/com.google.api_gax-grpc-2.20.1.jar{{PWD}}/io.grpc_grpc-alts-1.51.0.jar{{PWD}}/io.grpc_grpc-grpclb-1.51.0.jar{{PWD}}/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar{{PWD}}/io.grpc_grpc-protobuf-1.51.0.jar{{PWD}}/com.google.auth_google-auth-library-credentials-1.13.0.jar{{PWD}}/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar{{PWD}}/com.google.api_api-common-2.2.2.jar{{PWD}}/javax.annotation_javax.annotation-api-1.3.2.jar{{PWD}}/io.opencensus_opencensus-api-0.31.1.jar{{PWD}}/io.grpc_grpc-context-1.51.0.jar{{PWD}}/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar{{PWD}}/com.google.protobuf_protobuf-java-3.21.10.jar{{PWD}}/com.google.protobuf_protobuf-java-util-3.21.10.jar{{PWD}}/com.google.api.grpc_proto-google-common-protos-2.11.0.jar{{PWD}}/org.threeten_threetenbp-1.6.4.jar{{PWD}}/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.fasterxml.jackson.core_jackson-core-2.14.1.jar{{PWD}}/com.google.code.findbugs_jsr305-3.0.2.jar{{PWD}}/io.grpc_grpc-api-1.51.0.jar{{PWD}}/io.grpc_grpc-auth-1.51.0.jar{{PWD}}/io.grpc_grpc-stub-1.51.0.jar{{PWD}}/org.checkerframework_checker-qual-3.28.0.jar{{PWD}}/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar{{PWD}}/io.grpc_grpc-protobuf-lite-1.51.0.jar{{PWD}}/com.google.android_annotations-4.1.1.4.jar{{PWD}}/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar{{PWD}}/io.grpc_grpc-netty-shaded-1.51.0.jar{{PWD}}/io.perfmark_perfmark-api-0.26.0.jar{{PWD}}/io.grpc_grpc-googleapis-1.51.0.jar{{PWD}}/io.grpc_grpc-xds-1.51.0.jar{{PWD}}/io.opencensus_opencensus-proto-0.2.0.jar{{PWD}}/io.grpc_grpc-services-1.51.0.jar{{PWD}}/com.google.re2j_re2j-1.6.jar{{PWD}}/dk.brics.automaton_automaton-1.11-8.jar{{PWD}}/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", + " ('spark.executor.id', 'driver'),\n", + " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES',\n", + " 'http://hub-msca-bdp-dphub-students-rohitk-m:8088/proxy/application_1700703892135_0004'),\n", + " ('spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version', '2'),\n", + " ('spark.dynamicAllocation.maxExecutors', '10000'),\n", + " ('spark.yarn.dist.pyFiles',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.yarn.am.attemptFailuresValidityInterval', '1h'),\n", + " ('spark.app.name', 'Spark Updated Conf'),\n", + " ('spark.sql.catalogImplementation', 'hive'),\n", + " ('spark.executorEnv.OPENBLAS_NUM_THREADS', '1'),\n", + " ('spark.yarn.secondary.jars',\n", + " 'com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,com.typesafe_config-1.4.2.jar,org.rocksdb_rocksdbjni-6.29.5.jar,com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,com.github.universal-automata_liblevenshtein-3.0.0.jar,com.google.cloud_google-cloud-storage-2.16.0.jar,com.navigamez_greex-1.0.jar,com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,it.unimi.dsi_fastutil-7.0.12.jar,org.projectlombok_lombok-1.16.8.jar,com.google.guava_guava-31.1-jre.jar,com.google.guava_failureaccess-1.0.1.jar,com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,com.google.errorprone_error_prone_annotations-2.16.jar,com.google.j2objc_j2objc-annotations-1.3.jar,com.google.http-client_google-http-client-1.42.3.jar,io.opencensus_opencensus-contrib-http-util-0.31.1.jar,com.google.http-client_google-http-client-jackson2-1.42.3.jar,com.google.http-client_google-http-client-gson-1.42.3.jar,com.google.api-client_google-api-client-2.1.1.jar,commons-codec_commons-codec-1.15.jar,com.google.oauth-client_google-oauth-client-1.34.1.jar,com.google.http-client_google-http-client-apache-v2-1.42.3.jar,com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,com.google.code.gson_gson-2.10.jar,com.google.cloud_google-cloud-core-2.9.0.jar,com.google.auto.value_auto-value-annotations-1.10.1.jar,com.google.cloud_google-cloud-core-http-2.9.0.jar,com.google.http-client_google-http-client-appengine-1.42.3.jar,com.google.api_gax-httpjson-0.105.1.jar,com.google.cloud_google-cloud-core-grpc-2.9.0.jar,io.grpc_grpc-core-1.51.0.jar,com.google.api_gax-2.20.1.jar,com.google.api_gax-grpc-2.20.1.jar,io.grpc_grpc-alts-1.51.0.jar,io.grpc_grpc-grpclb-1.51.0.jar,org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,io.grpc_grpc-protobuf-1.51.0.jar,com.google.auth_google-auth-library-credentials-1.13.0.jar,com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,com.google.api_api-common-2.2.2.jar,javax.annotation_javax.annotation-api-1.3.2.jar,io.opencensus_opencensus-api-0.31.1.jar,io.grpc_grpc-context-1.51.0.jar,com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,com.google.protobuf_protobuf-java-3.21.10.jar,com.google.protobuf_protobuf-java-util-3.21.10.jar,com.google.api.grpc_proto-google-common-protos-2.11.0.jar,org.threeten_threetenbp-1.6.4.jar,com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,com.fasterxml.jackson.core_jackson-core-2.14.1.jar,com.google.code.findbugs_jsr305-3.0.2.jar,io.grpc_grpc-api-1.51.0.jar,io.grpc_grpc-auth-1.51.0.jar,io.grpc_grpc-stub-1.51.0.jar,org.checkerframework_checker-qual-3.28.0.jar,com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,io.grpc_grpc-protobuf-lite-1.51.0.jar,com.google.android_annotations-4.1.1.4.jar,org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,io.grpc_grpc-netty-shaded-1.51.0.jar,io.perfmark_perfmark-api-0.26.0.jar,io.grpc_grpc-googleapis-1.51.0.jar,io.grpc_grpc-xds-1.51.0.jar,io.opencensus_opencensus-proto-0.2.0.jar,io.grpc_grpc-services-1.51.0.jar,com.google.re2j_re2j-1.6.jar,dk.brics.automaton_automaton-1.11-8.jar,org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.history.fs.logDirectory',\n", + " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/73ad5d52-7eed-4ef0-a116-3e9fd02da220/spark-job-history'),\n", + " ('spark.ui.proxyBase', '/proxy/application_1700703892135_0004'),\n", + " ('spark.repl.local.jars',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.sql.cbo.enabled', 'true'),\n", + " ('spark.yarn.dist.jars',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.dataproc.sql.parquet.enableFooterCache', 'true'),\n", + " ('spark.driver.memory', '4g'),\n", + " ('spark.sql.warehouse.dir', 'file:/spark-warehouse'),\n", + " ('spark.yarn.executor.failuresValidityInterval', '1h'),\n", + " ('spark.app.id', 'application_1700703892135_0004'),\n", + " ('spark.yarn.am.memory', '640m'),\n", + " ('spark.yarn.historyServer.address',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m:18080'),\n", + " ('spark.cores.max', '4'),\n", + " ('spark.executor.cores', '4'),\n", + " ('spark.eventLog.dir',\n", + " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/73ad5d52-7eed-4ef0-a116-3e9fd02da220/spark-job-history'),\n", + " ('spark.jars.packages',\n", + " 'com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.0,graphframes:graphframes:0.8.2-spark3.1-s_2.12'),\n", + " ('spark.executor.instances', '2'),\n", + " ('spark.dataproc.listeners',\n", + " 'com.google.cloud.spark.performance.DataprocMetricsListener'),\n", + " ('spark.sql.autoBroadcastJoinThreshold', '90m'),\n", + " ('spark.serializer.objectStreamReset', '100'),\n", + " ('spark.submit.deployMode', 'client'),\n", + " ('spark.driver.appUIAddress',\n", + " 'http://hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal:42407'),\n", + " ('spark.sql.cbo.joinReorder.enabled', 'true'),\n", + " ('spark.shuffle.service.enabled', 'true'),\n", + " ('spark.scheduler.mode', 'FAIR'),\n", + " ('spark.sql.adaptive.enabled', 'true'),\n", + " ('spark.yarn.jars', 'local:/usr/lib/spark/jars/*'),\n", + " ('spark.scheduler.minRegisteredResourcesRatio', '0.0'),\n", + " ('spark.master', 'yarn'),\n", + " ('spark.ui.port', '0'),\n", + " ('spark.rpc.message.maxSize', '512'),\n", + " ('spark.rdd.compress', 'True'),\n", + " ('spark.task.maxFailures', '10'),\n", + " ('spark.yarn.isPython', 'true'),\n", + " ('spark.dynamicAllocation.enabled', 'true'),\n", + " ('spark.ui.showConsoleProgress', 'true')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# read in packages create spark environment\n", "from pyspark.sql import SparkSession\n", @@ -24,7 +135,7 @@ "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", - "spark = SparkSession.builder.appName('supervised').getOrCreate()\n", + "spark = SparkSession.builder.appName('unsupervised').getOrCreate()\n", "\n", "#change configuration settings on Spark \n", "conf = spark.sparkContext._conf.setAll([('spark.executor.memory', '4g'), ('spark.app.name', 'Spark Updated Conf'), ('spark.executor.cores', '4'), ('spark.cores.max', '4'), ('spark.driver.memory','4g')])\n", @@ -43,10 +154,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "b98781e4-a2cd-4da2-aa73-70de31956265", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "| ID| start_timestamp| end_timestamp|seconds|miles|pickup_tract|dropoff_tract|pickup_area|dropoff_area|Fare|Tip|total| pickup_lat| pickup_lon| dropoff_lat| dropoff_lon|month|day_of_month|hour|day|\n", + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "|625e77ae6e0ff7191...|2018-11-06 19:00:00|2018-11-06 19:15:00| 1142| 5.8| 17031063400| 17031010400| 6| 1|12.5| 0| 15.0|41.9346591566|-87.6467297286| 42.004764559| -87.659122427| 11| 6| 19| 3|\n", + "|62945fdb2e70957f0...|2018-11-06 19:00:00|2018-11-06 19:00:00| 341| 1.2| 17031081800| 17031833000| 8| 28| 5.0| 0| 7.5|41.8932163595|-87.6378442095|41.8852813201|-87.6572331997| 11| 6| 19| 3|\n", + "|6dc03f91e4480d237...|2018-11-06 19:00:00|2018-11-06 19:00:00| 558| 1.2| 17031070400| 17031061500| 7| 6| 7.5| 0| 10.3|41.9289672664|-87.6561568309|41.9452823311|-87.6615450961| 11| 6| 19| 3|\n", + "|773894079a526afa1...|2018-11-06 19:00:00|2018-11-06 19:30:00| 1047| 2.8| 17031832200| 17031062100| 22| 6|10.0| 2| 14.5|41.9204515116|-87.6799547678|41.9426918444|-87.6517705068| 11| 6| 19| 3|\n", + "|7acf0a7f2edfbe546...|2018-11-06 19:00:00|2018-11-06 19:00:00| 502| 1.3| 17031839100| 17031081700| 32| 8| 2.5| 0| 5.0|41.8809944707|-87.6327464887|41.8920421365|-87.6318639497| 11| 6| 19| 3|\n", + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], "source": [ "# read in rideshare data for all years, concatenate, create appropriate partitioning\n", "# we are dropping 2020 because covid will affect the performance of our model\n", @@ -63,13 +199,1741 @@ "df_all = df_2018.union(df_2019).union(df_2021).union(df_2022).union(df_2023)\n", "df_all.show(5)" ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "978c6a20-72a3-4a7e-a588-4e1ad2386915", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Partitions: 544\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 32:=====================================================>(538 + 6) / 544]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------+------+\n", + "|partitionId| count|\n", + "+-----------+------+\n", + "| 42|305254|\n", + "| 41|305316|\n", + "| 40|305420|\n", + "| 38|305471|\n", + "| 39|305480|\n", + "| 37|305618|\n", + "| 36|305676|\n", + "| 35|305871|\n", + "| 34|305890|\n", + "| 33|305962|\n", + "| 32|305971|\n", + "| 31|306010|\n", + "| 29|306031|\n", + "| 30|306038|\n", + "| 28|306086|\n", + "| 27|306127|\n", + "| 26|306402|\n", + "| 25|306467|\n", + "| 24|306633|\n", + "| 23|306731|\n", + "| 22|307226|\n", + "| 243|328837|\n", + "| 242|328975|\n", + "| 241|329131|\n", + "| 240|329163|\n", + "| 239|329209|\n", + "| 237|329245|\n", + "| 238|329263|\n", + "| 235|329263|\n", + "| 234|329311|\n", + "| 236|329315|\n", + "| 232|329332|\n", + "| 233|329344|\n", + "| 231|329373|\n", + "| 228|329389|\n", + "| 229|329390|\n", + "| 227|329399|\n", + "| 226|329410|\n", + "| 225|329410|\n", + "| 224|329418|\n", + "| 230|329427|\n", + "| 223|329428|\n", + "| 220|329461|\n", + "| 222|329481|\n", + "| 221|329505|\n", + "| 217|329507|\n", + "| 218|329513|\n", + "| 219|329519|\n", + "| 216|329523|\n", + "| 214|329533|\n", + "| 213|329555|\n", + "| 215|329574|\n", + "| 211|329587|\n", + "| 212|329591|\n", + "| 208|329607|\n", + "| 210|329623|\n", + "| 206|329624|\n", + "| 209|329630|\n", + "| 207|329633|\n", + "| 205|329646|\n", + "| 202|329654|\n", + "| 204|329673|\n", + "| 203|329678|\n", + "| 194|329704|\n", + "| 201|329708|\n", + "| 200|329712|\n", + "| 191|329717|\n", + "| 189|329728|\n", + "| 188|329730|\n", + "| 199|329732|\n", + "| 193|329732|\n", + "| 198|329739|\n", + "| 190|329746|\n", + "| 195|329748|\n", + "| 197|329786|\n", + "| 186|329786|\n", + "| 196|329786|\n", + "| 187|329787|\n", + "| 192|329793|\n", + "| 183|329804|\n", + "| 184|329805|\n", + "| 179|329807|\n", + "| 182|329814|\n", + "| 181|329819|\n", + "| 185|329843|\n", + "| 178|329849|\n", + "| 177|329861|\n", + "| 180|329865|\n", + "| 172|329866|\n", + "| 173|329871|\n", + "| 171|329879|\n", + "| 175|329889|\n", + "| 174|329892|\n", + "| 176|329900|\n", + "| 170|329937|\n", + "| 168|329956|\n", + "| 162|329961|\n", + "| 167|329962|\n", + "| 164|329965|\n", + "| 169|329974|\n", + "| 165|329980|\n", + "| 163|329985|\n", + "| 159|329993|\n", + "| 166|330000|\n", + "| 158|330005|\n", + "| 161|330014|\n", + "| 157|330021|\n", + "| 160|330027|\n", + "| 156|330059|\n", + "| 154|330066|\n", + "| 147|330070|\n", + "| 153|330083|\n", + "| 150|330084|\n", + "| 155|330084|\n", + "| 146|330089|\n", + "| 152|330090|\n", + "| 149|330099|\n", + "| 151|330101|\n", + "| 148|330103|\n", + "| 144|330116|\n", + "| 145|330129|\n", + "| 140|330133|\n", + "| 143|330135|\n", + "| 141|330139|\n", + "| 142|330148|\n", + "| 139|330159|\n", + "| 135|330178|\n", + "| 137|330181|\n", + "| 132|330186|\n", + "| 138|330189|\n", + "| 136|330205|\n", + "| 129|330208|\n", + "| 133|330219|\n", + "| 125|330220|\n", + "| 128|330227|\n", + "| 134|330230|\n", + "| 127|330236|\n", + "| 130|330243|\n", + "| 124|330245|\n", + "| 117|330249|\n", + "| 131|330252|\n", + "| 121|330255|\n", + "| 122|330258|\n", + "| 126|330268|\n", + "| 123|330270|\n", + "| 118|330275|\n", + "| 115|330282|\n", + "| 113|330282|\n", + "| 120|330296|\n", + "| 112|330297|\n", + "| 119|330306|\n", + "| 114|330308|\n", + "| 104|330338|\n", + "| 116|330339|\n", + "| 109|330355|\n", + "| 105|330363|\n", + "| 111|330366|\n", + "| 110|330372|\n", + "| 108|330383|\n", + "| 107|330385|\n", + "| 106|330402|\n", + "| 96|330413|\n", + "| 102|330415|\n", + "| 100|330418|\n", + "| 98|330418|\n", + "| 101|330419|\n", + "| 94|330425|\n", + "| 97|330426|\n", + "| 99|330430|\n", + "| 103|330430|\n", + "| 95|330436|\n", + "| 90|330456|\n", + "| 92|330470|\n", + "| 88|330471|\n", + "| 91|330474|\n", + "| 93|330477|\n", + "| 87|330494|\n", + "| 86|330501|\n", + "| 89|330512|\n", + "| 84|330524|\n", + "| 82|330550|\n", + "| 80|330570|\n", + "| 85|330570|\n", + "| 81|330579|\n", + "| 83|330585|\n", + "| 78|330622|\n", + "| 79|330625|\n", + "| 76|330625|\n", + "| 75|330642|\n", + "| 77|330646|\n", + "| 71|330651|\n", + "| 74|330653|\n", + "| 73|330667|\n", + "| 70|330690|\n", + "| 65|330704|\n", + "| 72|330709|\n", + "| 67|330721|\n", + "| 66|330724|\n", + "| 69|330737|\n", + "| 62|330758|\n", + "| 64|330762|\n", + "| 63|330762|\n", + "| 68|330766|\n", + "| 60|330782|\n", + "| 59|330784|\n", + "| 56|330801|\n", + "| 57|330805|\n", + "| 61|330807|\n", + "| 58|330837|\n", + "| 53|330868|\n", + "| 55|330869|\n", + "| 54|330885|\n", + "| 52|330918|\n", + "| 50|330944|\n", + "| 51|330963|\n", + "| 49|331028|\n", + "| 48|331034|\n", + "| 47|331050|\n", + "| 46|331114|\n", + "| 45|331284|\n", + "| 44|331416|\n", + "| 543|364094|\n", + "| 542|364374|\n", + "| 541|364493|\n", + "| 537|364581|\n", + "| 538|364599|\n", + "| 539|364616|\n", + "| 540|364617|\n", + "| 536|364654|\n", + "| 534|364709|\n", + "| 535|364756|\n", + "| 532|364784|\n", + "| 533|364810|\n", + "| 529|364899|\n", + "| 530|364903|\n", + "| 531|364944|\n", + "| 528|364957|\n", + "| 527|364961|\n", + "| 524|364971|\n", + "| 525|364988|\n", + "| 526|365006|\n", + "| 522|365011|\n", + "| 523|365051|\n", + "| 521|365057|\n", + "| 520|365079|\n", + "| 518|365083|\n", + "| 517|365090|\n", + "| 519|365097|\n", + "| 516|365122|\n", + "| 514|365165|\n", + "| 515|365179|\n", + "| 513|365224|\n", + "| 509|365252|\n", + "| 506|365253|\n", + "| 511|365255|\n", + "| 508|365272|\n", + "| 510|365277|\n", + "| 512|365278|\n", + "| 507|365302|\n", + "| 505|365347|\n", + "| 502|365377|\n", + "| 503|365394|\n", + "| 504|365395|\n", + "| 501|365409|\n", + "| 500|365431|\n", + "| 498|365447|\n", + "| 499|365454|\n", + "| 497|365519|\n", + "| 496|365528|\n", + "| 495|365536|\n", + "| 492|365541|\n", + "| 489|365547|\n", + "| 488|365552|\n", + "| 487|365554|\n", + "| 490|365569|\n", + "| 493|365574|\n", + "| 484|365576|\n", + "| 494|365595|\n", + "| 485|365602|\n", + "| 486|365622|\n", + "| 491|365622|\n", + "| 483|365650|\n", + "| 482|365684|\n", + "| 481|365705|\n", + "| 479|365750|\n", + "| 478|365773|\n", + "| 477|365793|\n", + "| 480|365801|\n", + "| 475|365806|\n", + "| 474|365806|\n", + "| 473|365828|\n", + "| 476|365846|\n", + "| 472|365909|\n", + "| 471|365965|\n", + "| 470|365975|\n", + "| 469|366026|\n", + "| 466|366051|\n", + "| 467|366057|\n", + "| 468|366080|\n", + "| 464|366105|\n", + "| 465|366117|\n", + "| 462|366150|\n", + "| 463|366160|\n", + "| 458|366193|\n", + "| 461|366200|\n", + "| 460|366214|\n", + "| 459|366217|\n", + "| 456|366297|\n", + "| 457|366320|\n", + "| 455|366371|\n", + "| 454|366383|\n", + "| 453|366422|\n", + "| 452|366461|\n", + "| 451|366589|\n", + "| 450|366617|\n", + "| 449|366758|\n", + "| 448|366799|\n", + "| 447|366883|\n", + "| 446|366901|\n", + "| 445|366940|\n", + "| 444|367122|\n", + "| 21|380513|\n", + "| 20|380565|\n", + "| 19|380749|\n", + "| 18|381028|\n", + "| 17|381069|\n", + "| 16|381243|\n", + "| 15|381263|\n", + "| 14|381438|\n", + "| 13|381470|\n", + "| 12|381544|\n", + "| 11|381646|\n", + "| 10|381711|\n", + "| 8|381721|\n", + "| 9|381753|\n", + "| 7|381759|\n", + "| 6|381763|\n", + "| 5|381783|\n", + "| 4|381827|\n", + "| 3|381971|\n", + "| 1|382022|\n", + "| 2|382029|\n", + "| 0|382095|\n", + "| 342|420259|\n", + "| 343|420346|\n", + "| 341|420485|\n", + "| 340|420525|\n", + "| 339|420707|\n", + "| 336|421031|\n", + "| 337|421040|\n", + "| 338|421052|\n", + "| 334|421107|\n", + "| 335|421142|\n", + "| 333|421374|\n", + "| 330|421440|\n", + "| 332|421479|\n", + "| 331|421531|\n", + "| 327|421574|\n", + "| 328|421603|\n", + "| 329|421610|\n", + "| 326|421612|\n", + "| 322|421670|\n", + "| 320|421675|\n", + "| 325|421679|\n", + "| 324|421681|\n", + "| 319|421687|\n", + "| 323|421687|\n", + "| 321|421699|\n", + "| 318|421751|\n", + "| 315|421832|\n", + "| 310|421867|\n", + "| 316|421897|\n", + "| 312|421903|\n", + "| 317|421911|\n", + "| 314|421918|\n", + "| 313|421920|\n", + "| 311|421950|\n", + "| 309|421972|\n", + "| 307|421988|\n", + "| 308|422019|\n", + "| 305|422072|\n", + "| 303|422083|\n", + "| 306|422091|\n", + "| 304|422095|\n", + "| 302|422097|\n", + "| 298|422103|\n", + "| 300|422114|\n", + "| 301|422116|\n", + "| 295|422134|\n", + "| 296|422155|\n", + "| 299|422155|\n", + "| 290|422185|\n", + "| 297|422193|\n", + "| 294|422194|\n", + "| 292|422207|\n", + "| 291|422218|\n", + "| 293|422236|\n", + "| 288|422238|\n", + "| 286|422255|\n", + "| 289|422265|\n", + "| 287|422266|\n", + "| 285|422305|\n", + "| 283|422307|\n", + "| 284|422346|\n", + "| 282|422350|\n", + "| 281|422354|\n", + "| 280|422372|\n", + "| 279|422415|\n", + "| 278|422498|\n", + "| 277|422501|\n", + "| 276|422508|\n", + "| 275|422549|\n", + "| 274|422557|\n", + "| 273|422591|\n", + "| 272|422625|\n", + "| 270|422634|\n", + "| 269|422671|\n", + "| 268|422673|\n", + "| 271|422692|\n", + "| 267|422694|\n", + "| 265|422761|\n", + "| 262|422777|\n", + "| 260|422788|\n", + "| 263|422795|\n", + "| 266|422803|\n", + "| 264|422807|\n", + "| 258|422838|\n", + "| 259|422839|\n", + "| 261|422841|\n", + "| 257|422852|\n", + "| 256|422891|\n", + "| 252|422904|\n", + "| 255|422925|\n", + "| 254|422986|\n", + "| 253|423003|\n", + "| 250|423197|\n", + "| 251|423202|\n", + "| 248|423231|\n", + "| 249|423262|\n", + "| 246|423376|\n", + "| 247|423402|\n", + "| 245|423403|\n", + "| 244|423762|\n", + "| 43|457702|\n", + "| 443|569570|\n", + "| 442|570154|\n", + "| 441|570301|\n", + "| 440|570372|\n", + "| 439|570572|\n", + "| 438|570655|\n", + "| 436|570763|\n", + "| 437|570781|\n", + "| 434|570870|\n", + "| 435|570872|\n", + "| 433|570953|\n", + "| 432|570979|\n", + "| 431|571069|\n", + "| 429|571096|\n", + "| 430|571097|\n", + "| 428|571127|\n", + "| 427|571153|\n", + "| 426|571185|\n", + "| 425|571201|\n", + "| 424|571286|\n", + "| 423|571425|\n", + "| 422|571449|\n", + "| 417|571506|\n", + "| 420|571528|\n", + "| 421|571532|\n", + "| 419|571553|\n", + "| 418|571585|\n", + "| 416|571595|\n", + "| 414|571645|\n", + "| 415|571657|\n", + "| 413|571742|\n", + "| 412|571766|\n", + "| 411|571796|\n", + "| 409|571842|\n", + "| 410|571847|\n", + "| 407|571874|\n", + "| 408|571913|\n", + "| 406|571925|\n", + "| 405|571966|\n", + "| 404|571983|\n", + "| 402|571993|\n", + "| 403|572020|\n", + "| 401|572123|\n", + "| 397|572181|\n", + "| 400|572182|\n", + "| 399|572183|\n", + "| 398|572189|\n", + "| 396|572212|\n", + "| 395|572244|\n", + "| 393|572249|\n", + "| 394|572276|\n", + "| 392|572302|\n", + "| 391|572344|\n", + "| 390|572361|\n", + "| 389|572382|\n", + "| 388|572394|\n", + "| 387|572428|\n", + "| 386|572438|\n", + "| 385|572493|\n", + "| 383|572545|\n", + "| 384|572565|\n", + "| 382|572569|\n", + "| 381|572600|\n", + "| 380|572604|\n", + "| 379|572634|\n", + "| 378|572647|\n", + "| 376|572742|\n", + "| 375|572742|\n", + "| 377|572755|\n", + "| 374|572798|\n", + "| 372|572800|\n", + "| 373|572816|\n", + "| 371|572868|\n", + "| 370|572895|\n", + "| 369|572907|\n", + "| 368|572924|\n", + "| 367|572957|\n", + "| 366|573022|\n", + "| 364|573102|\n", + "| 365|573104|\n", + "| 362|573132|\n", + "| 363|573145|\n", + "| 361|573173|\n", + "| 360|573187|\n", + "| 358|573262|\n", + "| 359|573270|\n", + "| 357|573334|\n", + "| 356|573372|\n", + "| 355|573425|\n", + "| 354|573556|\n", + "| 353|573584|\n", + "| 352|573658|\n", + "| 351|573676|\n", + "| 350|573781|\n", + "| 349|573977|\n", + "| 347|574013|\n", + "| 348|574040|\n", + "| 346|574185|\n", + "| 345|574318|\n", + "| 344|574727|\n", + "+-----------+------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "#display number of records by partition\n", + "def displaypartitions(df):\n", + " #number of records by partition\n", + " num = df.rdd.getNumPartitions()\n", + " print(\"Partitions:\", num)\n", + " df.withColumn(\"partitionId\", F.spark_partition_id())\\\n", + " .groupBy(\"partitionId\")\\\n", + " .count()\\\n", + " .orderBy(F.asc(\"count\"))\\\n", + " .show(num)\n", + "\n", + "df_all.rdd.getNumPartitions()\n", + "displaypartitions(df_all)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "8c914559-481c-4dbe-8438-91eeb2795b54", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 35:=====================================================>(543 + 1) / 544]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Partitions: 600\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 38:=================================================> (565 + 32) / 600]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------+------+\n", + "|partitionId| count|\n", + "+-----------+------+\n", + "| 407|362152|\n", + "| 404|362153|\n", + "| 403|362153|\n", + "| 398|362153|\n", + "| 406|362153|\n", + "| 405|362153|\n", + "| 408|362153|\n", + "| 399|362153|\n", + "| 136|362154|\n", + "| 137|362154|\n", + "| 400|362154|\n", + "| 397|362154|\n", + "| 396|362154|\n", + "| 409|362154|\n", + "| 135|362154|\n", + "| 535|362154|\n", + "| 138|362154|\n", + "| 401|362154|\n", + "| 69|362155|\n", + "| 140|362155|\n", + "| 537|362155|\n", + "| 108|362155|\n", + "| 539|362155|\n", + "| 146|362155|\n", + "| 538|362155|\n", + "| 534|362155|\n", + "| 402|362155|\n", + "| 148|362155|\n", + "| 536|362155|\n", + "| 106|362155|\n", + "| 410|362155|\n", + "| 151|362155|\n", + "| 107|362155|\n", + "| 70|362155|\n", + "| 105|362155|\n", + "| 147|362155|\n", + "| 144|362155|\n", + "| 134|362155|\n", + "| 139|362155|\n", + "| 150|362155|\n", + "| 84|362156|\n", + "| 395|362156|\n", + "| 92|362156|\n", + "| 143|362156|\n", + "| 160|362156|\n", + "| 394|362156|\n", + "| 411|362156|\n", + "| 109|362156|\n", + "| 72|362156|\n", + "| 145|362156|\n", + "| 93|362156|\n", + "| 94|362156|\n", + "| 532|362156|\n", + "| 142|362156|\n", + "| 68|362156|\n", + "| 71|362156|\n", + "| 101|362156|\n", + "| 412|362156|\n", + "| 529|362156|\n", + "| 149|362156|\n", + "| 531|362156|\n", + "| 152|362156|\n", + "| 530|362156|\n", + "| 67|362156|\n", + "| 89|362156|\n", + "| 141|362157|\n", + "| 85|362157|\n", + "| 97|362157|\n", + "| 416|362157|\n", + "| 130|362157|\n", + "| 419|362157|\n", + "| 111|362157|\n", + "| 90|362157|\n", + "| 98|362157|\n", + "| 540|362157|\n", + "| 82|362157|\n", + "| 75|362157|\n", + "| 133|362157|\n", + "| 79|362157|\n", + "| 91|362157|\n", + "| 317|362157|\n", + "| 414|362157|\n", + "| 66|362157|\n", + "| 153|362157|\n", + "| 162|362157|\n", + "| 315|362157|\n", + "| 102|362157|\n", + "| 413|362157|\n", + "| 83|362157|\n", + "| 438|362158|\n", + "| 393|362158|\n", + "| 77|362158|\n", + "| 96|362158|\n", + "| 527|362158|\n", + "| 163|362158|\n", + "| 87|362158|\n", + "| 322|362158|\n", + "| 100|362158|\n", + "| 103|362158|\n", + "| 81|362158|\n", + "| 64|362158|\n", + "| 80|362158|\n", + "| 112|362158|\n", + "| 320|362158|\n", + "| 418|362158|\n", + "| 417|362158|\n", + "| 415|362158|\n", + "| 95|362158|\n", + "| 321|362158|\n", + "| 73|362158|\n", + "| 427|362158|\n", + "| 65|362158|\n", + "| 533|362158|\n", + "| 524|362158|\n", + "| 390|362158|\n", + "| 76|362158|\n", + "| 319|362158|\n", + "| 525|362158|\n", + "| 161|362158|\n", + "| 318|362158|\n", + "| 316|362158|\n", + "| 164|362158|\n", + "| 99|362158|\n", + "| 420|362158|\n", + "| 131|362158|\n", + "| 88|362158|\n", + "| 74|362158|\n", + "| 104|362158|\n", + "| 541|362158|\n", + "| 110|362158|\n", + "| 526|362158|\n", + "| 528|362158|\n", + "| 86|362158|\n", + "| 78|362159|\n", + "| 132|362159|\n", + "| 583|362159|\n", + "| 545|362159|\n", + "| 314|362159|\n", + "| 449|362159|\n", + "| 543|362159|\n", + "| 441|362159|\n", + "| 430|362159|\n", + "| 434|362159|\n", + "| 439|362159|\n", + "| 580|362159|\n", + "| 129|362159|\n", + "| 542|362159|\n", + "| 431|362159|\n", + "| 579|362159|\n", + "| 159|362159|\n", + "| 113|362159|\n", + "| 165|362159|\n", + "| 426|362159|\n", + "| 157|362159|\n", + "| 156|362159|\n", + "| 578|362159|\n", + "| 567|362159|\n", + "| 118|362159|\n", + "| 154|362159|\n", + "| 581|362159|\n", + "| 433|362159|\n", + "| 566|362159|\n", + "| 423|362159|\n", + "| 155|362159|\n", + "| 158|362159|\n", + "| 440|362159|\n", + "| 586|362160|\n", + "| 585|362160|\n", + "| 597|362160|\n", + "| 392|362160|\n", + "| 62|362160|\n", + "| 3|362160|\n", + "| 425|362160|\n", + "| 429|362160|\n", + "| 116|362160|\n", + "| 324|362160|\n", + "| 584|362160|\n", + "| 180|362160|\n", + "| 596|362160|\n", + "| 312|362160|\n", + "| 389|362160|\n", + "| 565|362160|\n", + "| 544|362160|\n", + "| 588|362160|\n", + "| 448|362160|\n", + "| 166|362160|\n", + "| 569|362160|\n", + "| 446|362160|\n", + "| 114|362160|\n", + "| 63|362160|\n", + "| 385|362160|\n", + "| 117|362160|\n", + "| 428|362160|\n", + "| 548|362160|\n", + "| 595|362160|\n", + "| 328|362160|\n", + "| 587|362160|\n", + "| 323|362160|\n", + "| 559|362160|\n", + "| 386|362160|\n", + "| 432|362160|\n", + "| 582|362160|\n", + "| 391|362160|\n", + "| 561|362160|\n", + "| 309|362160|\n", + "| 421|362160|\n", + "| 2|362160|\n", + "| 547|362160|\n", + "| 598|362160|\n", + "| 424|362160|\n", + "| 435|362160|\n", + "| 568|362160|\n", + "| 115|362160|\n", + "| 313|362160|\n", + "| 387|362160|\n", + "| 442|362160|\n", + "| 437|362160|\n", + "| 13|362160|\n", + "| 436|362160|\n", + "| 444|362160|\n", + "| 571|362160|\n", + "| 589|362160|\n", + "| 422|362160|\n", + "| 26|362161|\n", + "| 8|362161|\n", + "| 332|362161|\n", + "| 577|362161|\n", + "| 443|362161|\n", + "| 219|362161|\n", + "| 591|362161|\n", + "| 376|362161|\n", + "| 178|362161|\n", + "| 447|362161|\n", + "| 546|362161|\n", + "| 564|362161|\n", + "| 337|362161|\n", + "| 4|362161|\n", + "| 372|362161|\n", + "| 59|362161|\n", + "| 451|362161|\n", + "| 450|362161|\n", + "| 27|362161|\n", + "| 570|362161|\n", + "| 217|362161|\n", + "| 329|362161|\n", + "| 560|362161|\n", + "| 326|362161|\n", + "| 388|362161|\n", + "| 572|362161|\n", + "| 331|362161|\n", + "| 24|362161|\n", + "| 167|362161|\n", + "| 25|362161|\n", + "| 15|362161|\n", + "| 120|362161|\n", + "| 336|362161|\n", + "| 181|362161|\n", + "| 552|362161|\n", + "| 338|362161|\n", + "| 325|362161|\n", + "| 377|362161|\n", + "| 179|362161|\n", + "| 11|362161|\n", + "| 523|362161|\n", + "| 127|362161|\n", + "| 61|362161|\n", + "| 311|362161|\n", + "| 310|362161|\n", + "| 119|362161|\n", + "| 562|362161|\n", + "| 128|362161|\n", + "| 549|362161|\n", + "| 7|362161|\n", + "| 361|362161|\n", + "| 594|362161|\n", + "| 558|362161|\n", + "| 445|362161|\n", + "| 22|362162|\n", + "| 521|362162|\n", + "| 333|362162|\n", + "| 453|362162|\n", + "| 48|362162|\n", + "| 19|362162|\n", + "| 168|362162|\n", + "| 576|362162|\n", + "| 363|362162|\n", + "| 32|362162|\n", + "| 362|362162|\n", + "| 599|362162|\n", + "| 122|362162|\n", + "| 573|362162|\n", + "| 384|362162|\n", + "| 575|362162|\n", + "| 126|362162|\n", + "| 379|362162|\n", + "| 374|362162|\n", + "| 20|362162|\n", + "| 327|362162|\n", + "| 18|362162|\n", + "| 28|362162|\n", + "| 593|362162|\n", + "| 35|362162|\n", + "| 330|362162|\n", + "| 5|362162|\n", + "| 381|362162|\n", + "| 216|362162|\n", + "| 380|362162|\n", + "| 1|362162|\n", + "| 339|362162|\n", + "| 60|362162|\n", + "| 21|362162|\n", + "| 550|362162|\n", + "| 378|362162|\n", + "| 228|362162|\n", + "| 12|362162|\n", + "| 9|362162|\n", + "| 17|362162|\n", + "| 382|362162|\n", + "| 121|362162|\n", + "| 221|362162|\n", + "| 590|362162|\n", + "| 375|362162|\n", + "| 522|362162|\n", + "| 373|362162|\n", + "| 14|362162|\n", + "| 182|362162|\n", + "| 563|362162|\n", + "| 383|362162|\n", + "| 218|362162|\n", + "| 10|362162|\n", + "| 16|362162|\n", + "| 23|362162|\n", + "| 40|362163|\n", + "| 57|362163|\n", + "| 340|362163|\n", + "| 56|362163|\n", + "| 41|362163|\n", + "| 33|362163|\n", + "| 557|362163|\n", + "| 359|362163|\n", + "| 6|362163|\n", + "| 234|362163|\n", + "| 50|362163|\n", + "| 171|362163|\n", + "| 177|362163|\n", + "| 556|362163|\n", + "| 173|362163|\n", + "| 229|362163|\n", + "| 176|362163|\n", + "| 520|362163|\n", + "| 49|362163|\n", + "| 206|362163|\n", + "| 205|362163|\n", + "| 452|362163|\n", + "| 308|362163|\n", + "| 574|362163|\n", + "| 592|362163|\n", + "| 334|362163|\n", + "| 364|362163|\n", + "| 125|362163|\n", + "| 455|362163|\n", + "| 204|362163|\n", + "| 169|362163|\n", + "| 554|362163|\n", + "| 215|362163|\n", + "| 454|362163|\n", + "| 123|362163|\n", + "| 42|362163|\n", + "| 466|362163|\n", + "| 172|362163|\n", + "| 170|362163|\n", + "| 211|362163|\n", + "| 208|362163|\n", + "| 36|362163|\n", + "| 551|362163|\n", + "| 55|362163|\n", + "| 553|362163|\n", + "| 220|362163|\n", + "| 366|362163|\n", + "| 214|362163|\n", + "| 29|362163|\n", + "| 335|362163|\n", + "| 207|362163|\n", + "| 174|362163|\n", + "| 341|362163|\n", + "| 210|362163|\n", + "| 37|362163|\n", + "| 124|362163|\n", + "| 0|362163|\n", + "| 365|362163|\n", + "| 39|362163|\n", + "| 519|362164|\n", + "| 52|362164|\n", + "| 297|362164|\n", + "| 200|362164|\n", + "| 471|362164|\n", + "| 555|362164|\n", + "| 175|362164|\n", + "| 30|362164|\n", + "| 222|362164|\n", + "| 368|362164|\n", + "| 31|362164|\n", + "| 305|362164|\n", + "| 295|362164|\n", + "| 465|362164|\n", + "| 51|362164|\n", + "| 209|362164|\n", + "| 371|362164|\n", + "| 54|362164|\n", + "| 213|362164|\n", + "| 230|362164|\n", + "| 469|362164|\n", + "| 203|362164|\n", + "| 201|362164|\n", + "| 342|362164|\n", + "| 473|362164|\n", + "| 53|362164|\n", + "| 199|362164|\n", + "| 223|362164|\n", + "| 294|362164|\n", + "| 183|362164|\n", + "| 456|362164|\n", + "| 34|362164|\n", + "| 212|362164|\n", + "| 467|362164|\n", + "| 358|362164|\n", + "| 360|362164|\n", + "| 367|362164|\n", + "| 291|362164|\n", + "| 58|362164|\n", + "| 184|362164|\n", + "| 475|362164|\n", + "| 468|362164|\n", + "| 44|362165|\n", + "| 307|362165|\n", + "| 462|362165|\n", + "| 370|362165|\n", + "| 235|362165|\n", + "| 285|362165|\n", + "| 518|362165|\n", + "| 343|362165|\n", + "| 301|362165|\n", + "| 224|362165|\n", + "| 227|362165|\n", + "| 292|362165|\n", + "| 190|362165|\n", + "| 300|362165|\n", + "| 472|362165|\n", + "| 470|362165|\n", + "| 202|362165|\n", + "| 302|362165|\n", + "| 357|362165|\n", + "| 298|362165|\n", + "| 197|362165|\n", + "| 187|362165|\n", + "| 474|362165|\n", + "| 369|362165|\n", + "| 186|362165|\n", + "| 293|362165|\n", + "| 236|362165|\n", + "| 185|362165|\n", + "| 38|362165|\n", + "| 306|362165|\n", + "| 43|362165|\n", + "| 231|362165|\n", + "| 226|362165|\n", + "| 232|362165|\n", + "| 233|362165|\n", + "| 188|362165|\n", + "| 304|362165|\n", + "| 296|362165|\n", + "| 47|362165|\n", + "| 464|362165|\n", + "| 46|362165|\n", + "| 476|362165|\n", + "| 457|362165|\n", + "| 517|362165|\n", + "| 460|362166|\n", + "| 461|362166|\n", + "| 348|362166|\n", + "| 241|362166|\n", + "| 350|362166|\n", + "| 237|362166|\n", + "| 477|362166|\n", + "| 458|362166|\n", + "| 286|362166|\n", + "| 463|362166|\n", + "| 225|362166|\n", + "| 189|362166|\n", + "| 344|362166|\n", + "| 198|362166|\n", + "| 486|362166|\n", + "| 191|362166|\n", + "| 45|362166|\n", + "| 356|362166|\n", + "| 482|362166|\n", + "| 299|362166|\n", + "| 287|362166|\n", + "| 303|362166|\n", + "| 459|362166|\n", + "| 238|362166|\n", + "| 278|362167|\n", + "| 240|362167|\n", + "| 478|362167|\n", + "| 347|362167|\n", + "| 508|362167|\n", + "| 290|362167|\n", + "| 289|362167|\n", + "| 288|362167|\n", + "| 346|362167|\n", + "| 351|362167|\n", + "| 349|362167|\n", + "| 196|362167|\n", + "| 283|362167|\n", + "| 193|362167|\n", + "| 239|362167|\n", + "| 192|362167|\n", + "| 480|362167|\n", + "| 284|362167|\n", + "| 483|362167|\n", + "| 506|362168|\n", + "| 280|362168|\n", + "| 267|362168|\n", + "| 516|362168|\n", + "| 353|362168|\n", + "| 481|362168|\n", + "| 194|362168|\n", + "| 269|362168|\n", + "| 265|362168|\n", + "| 484|362168|\n", + "| 263|362168|\n", + "| 485|362168|\n", + "| 262|362168|\n", + "| 354|362168|\n", + "| 509|362168|\n", + "| 479|362168|\n", + "| 282|362168|\n", + "| 264|362168|\n", + "| 268|362168|\n", + "| 507|362168|\n", + "| 279|362168|\n", + "| 355|362168|\n", + "| 345|362168|\n", + "| 512|362168|\n", + "| 243|362168|\n", + "| 515|362168|\n", + "| 242|362168|\n", + "| 281|362168|\n", + "| 513|362168|\n", + "| 487|362168|\n", + "| 510|362168|\n", + "| 352|362168|\n", + "| 488|362168|\n", + "| 270|362169|\n", + "| 514|362169|\n", + "| 511|362169|\n", + "| 261|362169|\n", + "| 245|362169|\n", + "| 195|362169|\n", + "| 258|362170|\n", + "| 276|362170|\n", + "| 503|362170|\n", + "| 273|362170|\n", + "| 244|362170|\n", + "| 277|362170|\n", + "| 274|362170|\n", + "| 489|362170|\n", + "| 260|362170|\n", + "| 275|362170|\n", + "| 247|362170|\n", + "| 271|362170|\n", + "| 246|362170|\n", + "| 272|362170|\n", + "| 266|362170|\n", + "| 490|362170|\n", + "| 250|362171|\n", + "| 504|362171|\n", + "| 491|362171|\n", + "| 505|362171|\n", + "| 249|362171|\n", + "| 492|362171|\n", + "| 248|362171|\n", + "| 493|362172|\n", + "| 259|362172|\n", + "| 502|362172|\n", + "| 501|362173|\n", + "| 254|362173|\n", + "| 253|362173|\n", + "| 251|362173|\n", + "| 494|362174|\n", + "| 255|362174|\n", + "| 252|362174|\n", + "| 495|362174|\n", + "| 256|362174|\n", + "| 500|362174|\n", + "| 497|362175|\n", + "| 257|362175|\n", + "| 496|362176|\n", + "| 499|362176|\n", + "| 498|362177|\n", + "+-----------+------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# repartitioning to 600 partitions, seems to be balanced now. \n", + "df_all = df_all.repartition(600)\n", + "displaypartitions(df_all)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4b98e4e2-0cf7-406d-8b64-b47e5829e40e", + "metadata": {}, + "outputs": [], + "source": [ + "# we will need a year column in this model:\n", + "df_all = df_all.withColumn('year', F.year(df_all.start_timestamp))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7fa0c27a-245a-4d9b-bbb9-785c828a3317", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- ID: string (nullable = true)\n", + " |-- start_timestamp: timestamp (nullable = true)\n", + " |-- end_timestamp: timestamp (nullable = true)\n", + " |-- seconds: integer (nullable = true)\n", + " |-- miles: double (nullable = true)\n", + " |-- pickup_tract: long (nullable = true)\n", + " |-- dropoff_tract: long (nullable = true)\n", + " |-- pickup_area: integer (nullable = true)\n", + " |-- dropoff_area: integer (nullable = true)\n", + " |-- Fare: double (nullable = true)\n", + " |-- Tip: integer (nullable = true)\n", + " |-- total: double (nullable = true)\n", + " |-- pickup_lat: double (nullable = true)\n", + " |-- pickup_lon: double (nullable = true)\n", + " |-- dropoff_lat: double (nullable = true)\n", + " |-- dropoff_lon: string (nullable = true)\n", + " |-- month: integer (nullable = true)\n", + " |-- day_of_month: integer (nullable = true)\n", + " |-- hour: integer (nullable = true)\n", + " |-- day: integer (nullable = true)\n", + " |-- year: integer (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "df_all.printSchema()" + ] + }, + { + "cell_type": "markdown", + "id": "1b8404b4-c67e-4c8f-a699-40c6fef660e5", + "metadata": {}, + "source": [ + "## Next steps" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "41122437-d77b-4fa6-91f8-22126f875a52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "access @ file:///home/conda/feedstock_root/build_artifacts/access_1696558639912/work\n", + "affine @ file:///home/conda/feedstock_root/build_artifacts/affine_1674245120525/work\n", + "aiohttp @ file:///home/conda/feedstock_root/build_artifacts/aiohttp_1696765416168/work\n", + "aiosignal @ file:///home/conda/feedstock_root/build_artifacts/aiosignal_1667935791922/work\n", + "alabaster @ file:///home/conda/feedstock_root/build_artifacts/alabaster_1673645646525/work\n", + "alembic @ file:///home/conda/feedstock_root/build_artifacts/alembic_1698347477885/work\n", + "amply @ file:///home/conda/feedstock_root/build_artifacts/amply_1687675480808/work\n", + "ansiwrap==0.8.4\n", + "anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist\n", + "appdirs @ file:///home/conda/feedstock_root/build_artifacts/appdirs_1603108395799/work\n", + "argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work\n", + "argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1695386548039/work\n", + "arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1696128962909/work\n", + "astroid @ file:///home/conda/feedstock_root/build_artifacts/astroid_1697450283802/work\n", + "asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work\n", + "async-generator==1.10\n", + "async-timeout @ file:///home/conda/feedstock_root/build_artifacts/async-timeout_1691763562544/work\n", + "atomicwrites @ file:///home/conda/feedstock_root/build_artifacts/atomicwrites_1657325823582/work\n", + "attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work\n", + "autopep8 @ file:///home/conda/feedstock_root/build_artifacts/autopep8_1615918605177/work\n", + "Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1698174530262/work\n", + "backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work\n", + "backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work\n", + "bcolz==1.2.1\n", + "beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work\n", + "binaryornot==0.4.4\n", + "black @ file:///home/conda/feedstock_root/build_artifacts/black-recipe_1622561163993/work\n", + "bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1696630167146/work\n", + "blinker @ file:///home/conda/feedstock_root/build_artifacts/blinker_1698890160476/work\n", + "bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1652969564918/work\n", + "branca @ file:///home/conda/feedstock_root/build_artifacts/branca_1699295994965/work\n", + "Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1687884021435/work\n", + "brotlipy @ file:///home/conda/feedstock_root/build_artifacts/brotlipy_1695621656497/work\n", + "cachetools==4.2.4\n", + "certifi==2023.7.22\n", + "certipy==0.1.3\n", + "cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1696001773319/work\n", + "chardet @ file:///home/conda/feedstock_root/build_artifacts/chardet_1649184112677/work\n", + "charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1698833585322/work\n", + "click==7.1.2\n", + "click-plugins==1.1.1\n", + "cligj @ file:///home/conda/feedstock_root/build_artifacts/cligj_1633637764473/work\n", + "cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpickle_1697464713350/work\n", + "colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work\n", + "conda==4.9.2\n", + "conda-package-handling @ file:///home/conda/feedstock_root/build_artifacts/conda-package-handling_1691048088238/work\n", + "conda_package_streaming @ file:///home/conda/feedstock_root/build_artifacts/conda-package-streaming_1691009212940/work\n", + "confuse @ file:///home/conda/feedstock_root/build_artifacts/confuse_1680699073356/work\n", + "cookiecutter @ file:///home/conda/feedstock_root/build_artifacts/cookiecutter_1643669229020/work\n", + "coverage @ file:///home/conda/feedstock_root/build_artifacts/coverage_1696281775256/work\n", + "cryptography @ file:///home/conda/feedstock_root/build_artifacts/cryptography-split_1672672380968/work\n", + "cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work\n", + "Cython @ file:///home/conda/feedstock_root/build_artifacts/cython_1695285659207/work\n", + "cytoolz @ file:///home/conda/feedstock_root/build_artifacts/cytoolz_1695545170008/work\n", + "dask @ file:///home/conda/feedstock_root/build_artifacts/dask-core_1607657054678/work\n", + "dataclasses @ file:///home/conda/feedstock_root/build_artifacts/dataclasses_1628958434797/work\n", + "debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534280282/work\n", + "decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work\n", + "defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work\n", + "deprecation @ file:///home/conda/feedstock_root/build_artifacts/deprecation_1589881437857/work\n", + "descartes==1.1.0\n", + "diff-match-patch @ file:///home/conda/feedstock_root/build_artifacts/diff-match-patch_1683670697993/work\n", + "dill @ file:///home/conda/feedstock_root/build_artifacts/dill_1690101045195/work\n", + "distlib @ file:///home/conda/feedstock_root/build_artifacts/distlib_1689598491484/work\n", + "distributed @ file:///home/conda/feedstock_root/build_artifacts/distributed_1611361822694/work\n", + "docutils @ file:///home/conda/feedstock_root/build_artifacts/docutils_1695300443287/work\n", + "entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work\n", + "esda @ file:///home/conda/feedstock_root/build_artifacts/esda_1660931045600/work\n", + "exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work\n", + "executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work\n", + "fastavro @ file:///home/conda/feedstock_root/build_artifacts/fastavro_1652900770404/work\n", + "fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1696171779618/work/dist\n", + "fastparquet @ file:///home/conda/feedstock_root/build_artifacts/fastparquet_1619039245868/work\n", + "filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1698714947081/work\n", + "findspark @ file:///home/conda/feedstock_root/build_artifacts/findspark_1644599740637/work\n", + "Fiona @ file:///home/conda/feedstock_root/build_artifacts/fiona_1653911984590/work\n", + "flake8 @ file:///home/conda/feedstock_root/build_artifacts/flake8_1601874335748/work\n", + "folium @ file:///home/conda/feedstock_root/build_artifacts/folium_1699298670193/work\n", + "frozenlist @ file:///home/conda/feedstock_root/build_artifacts/frozenlist_1695377782835/work\n", + "fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1618579848600/work\n", + "future @ file:///home/conda/feedstock_root/build_artifacts/future_1673596611778/work\n", + "gcsfs @ file:///home/conda/feedstock_root/build_artifacts/gcsfs_1618251324500/work\n", + "GDAL==3.5.0\n", + "geopandas @ file:///home/conda/feedstock_root/build_artifacts/geopandas_1686057576800/work\n", + "giddy @ file:///home/conda/feedstock_root/build_artifacts/giddy_1696344753517/work\n", + "gitdb @ file:///home/conda/feedstock_root/build_artifacts/gitdb_1697791558612/work\n", + "GitPython @ file:///home/conda/feedstock_root/build_artifacts/gitpython_1697650329377/work\n", + "gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808683138/work\n", + "google-api-core==1.34.0\n", + "google-auth==1.35.0\n", + "google-auth-oauthlib==0.5.3\n", + "google-cloud-bigquery==3.13.0\n", + "google-cloud-bigquery-storage==2.1.0\n", + "google-cloud-bigtable==1.6.1\n", + "google-cloud-container==2.3.1\n", + "google-cloud-core==2.3.3\n", + "google-cloud-datacatalog==3.0.0\n", + "google-cloud-dataproc==2.2.0\n", + "google-cloud-datastore==2.1.6\n", + "google-cloud-language==2.0.0\n", + "google-cloud-logging==2.1.1\n", + "google-cloud-monitoring==2.0.1\n", + "google-cloud-pubsub==2.2.0\n", + "google-cloud-redis==2.0.0\n", + "google-cloud-spanner==2.1.1\n", + "google-cloud-speech==2.0.1\n", + "google-cloud-storage==2.11.0\n", + "google-cloud-texttospeech==2.2.0\n", + "google-cloud-translate==3.0.2\n", + "google-cloud-vision==2.0.0\n", + "google-crc32c==1.5.0\n", + "google-resumable-media==2.6.0\n", + "googleapis-common-protos==1.61.0\n", + "googlemaps==4.10.0\n", + "graphframes==0.6\n", + "greenlet @ file:///home/conda/feedstock_root/build_artifacts/greenlet_1698243377683/work\n", + "grpc-google-iam-v1==0.12.7\n", + "grpcio==1.59.2\n", + "grpcio-status==1.48.2\n", + "htmlmin==0.1.12\n", + "httplib2 @ file:///home/conda/feedstock_root/build_artifacts/httplib2_1617134439639/work\n", + "idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1593328102638/work\n", + "imagecodecs @ file:///home/conda/feedstock_root/build_artifacts/imagecodecs_1662930206934/work\n", + "ImageHash @ file:///home/conda/feedstock_root/build_artifacts/imagehash_1664371213222/work\n", + "imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1696854106455/work\n", + "imagesize @ file:///home/conda/feedstock_root/build_artifacts/imagesize_1656939531508/work\n", + "importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work\n", + "importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1699364556997/work\n", + "inequality==1.0.0\n", + "inflection @ file:///home/conda/feedstock_root/build_artifacts/inflection_1598089801258/work\n", + "iniconfig @ file:///home/conda/feedstock_root/build_artifacts/iniconfig_1673103042956/work\n", + "intervaltree @ file:///home/conda/feedstock_root/build_artifacts/intervaltree_1683532206518/work\n", + "ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1655241626755/work\n", + "ipyparallel @ file:///home/conda/feedstock_root/build_artifacts/ipyparallel_1607986704956/work\n", + "ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1683289033986/work\n", + "ipython-genutils==0.2.0\n", + "ipython-sql @ file:///home/conda/feedstock_root/build_artifacts/ipython-sql_1602667917966/work\n", + "ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1660942226216/work\n", + "isort @ file:///home/conda/feedstock_root/build_artifacts/isort_1675033873689/work\n", + "jaraco.classes @ file:///home/conda/feedstock_root/build_artifacts/jaraco.classes_1689112411129/work\n", + "jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1635823949331/work\n", + "jeepney @ file:///home/conda/feedstock_root/build_artifacts/jeepney_1649085214306/work\n", + "Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1636510082894/work\n", + "jinja2-time @ file:///home/conda/feedstock_root/build_artifacts/jinja2-time_1646750632133/work\n", + "joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1691577114857/work\n", + "json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work\n", + "jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1698678498820/work\n", + "jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1689701150890/work\n", + "jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1649327809992/work\n", + "jupyter-contrib-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_core_1657548529421/work\n", + "jupyter-contrib-nbextensions @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_nbextensions_1602805456242/work\n", + "jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1652365251650/work\n", + "# Editable install with no version control (jupyter-gcs-contents-manager==0.0.1)\n", + "-e /opt/dataproc/jupyter/jupyter-extensions-67f08e19469494ace1b953c515b09ae960c1a4ec/jupyter-gcs-contents-manager\n", + "jupyter-highlight-selected-word @ file:///home/conda/feedstock_root/build_artifacts/jupyter_highlight_selected_word_1638382841351/work\n", + "jupyter-http-over-ws @ file:///home/conda/feedstock_root/build_artifacts/jupyter_http_over_ws_1597332535364/work\n", + "jupyter-latex-envs @ file:///home/conda/feedstock_root/build_artifacts/jupyter_latex_envs_1614808832269/work\n", + "jupyter-nbextensions-configurator @ file:///home/conda/feedstock_root/build_artifacts/jupyter_nbextensions_configurator_1670793770953/work\n", + "jupyter-server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1647940913071/work\n", + "jupyter-server-mathjax @ file:///home/conda/feedstock_root/build_artifacts/jupyter-server-mathjax_1672324512570/work\n", + "jupyter-telemetry @ file:///home/conda/feedstock_root/build_artifacts/jupyter_telemetry_1605173804246/work\n", + "jupyterhub @ file:///home/conda/feedstock_root/build_artifacts/jupyterhub-feedstock_1614255305026/work\n", + "jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1632809509349/work\n", + "jupyterlab-git @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab-git_1620032639379/work\n", + "jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work\n", + "jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1631590465624/work\n", + "jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1671827361623/work\n", + "kaggle==1.5.16\n", + "keyring @ file:///home/conda/feedstock_root/build_artifacts/keyring_1696001522137/work\n", + "kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/kiwisolver_1695379923772/work\n", + "koalas @ file:///home/conda/feedstock_root/build_artifacts/koalas_1605320953654/work\n", + "libcst==1.1.0\n", + "libpysal @ file:///home/conda/feedstock_root/build_artifacts/libpysal_1668782270408/work\n", + "llvmlite==0.36.0\n", + "locket @ file:///home/conda/feedstock_root/build_artifacts/locket_1650660393415/work\n", + "lxml @ file:///home/conda/feedstock_root/build_artifacts/lxml_1649697664536/work\n", + "Mako @ file:///home/conda/feedstock_root/build_artifacts/mako_1699482234420/work\n", + "mamba @ file:///home/conda/feedstock_root/build_artifacts/mamba_1629310321864/work\n", + "mapclassify @ file:///home/conda/feedstock_root/build_artifacts/mapclassify_1673861555770/work\n", + "Markdown @ file:///home/conda/feedstock_root/build_artifacts/markdown_1651821407140/work\n", + "MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1695367437975/work\n", + "matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1632416634429/work\n", + "matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work\n", + "mccabe==0.6.1\n", + "metakernel @ file:///home/conda/feedstock_root/build_artifacts/metakernel_1648594625035/work\n", + "mgwr @ file:///home/conda/feedstock_root/build_artifacts/mgwr_1696605875605/work\n", + "missingno==0.4.2\n", + "mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1635844675081/work\n", + "mock @ file:///home/conda/feedstock_root/build_artifacts/mock_1689092066756/work\n", + "more-itertools @ file:///home/conda/feedstock_root/build_artifacts/more-itertools_1691086935839/work\n", + "mpmath @ file:///home/conda/feedstock_root/build_artifacts/mpmath_1678228039184/work\n", + "msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1695464102412/work\n", + "multidict @ file:///home/conda/feedstock_root/build_artifacts/multidict_1696716067907/work\n", + "munch @ file:///home/conda/feedstock_root/build_artifacts/munch_1688318326844/work\n", + "mypy-extensions @ file:///home/conda/feedstock_root/build_artifacts/mypy_extensions_1675543315189/work\n", + "nbclassic @ file:///home/conda/feedstock_root/build_artifacts/nbclassic_1682598306082/work\n", + "nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1646999386773/work\n", + "nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1605401836768/work\n", + "nbdime @ file:///home/conda/feedstock_root/build_artifacts/nbdime_1618448032595/work\n", + "nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1690814868471/work\n", + "nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1697083700168/work\n", + "networkx @ file:///home/conda/feedstock_root/build_artifacts/networkx_1680692919326/work\n", + "nltk @ file:///home/conda/feedstock_root/build_artifacts/nltk_1633093058893/work\n", + "nose==1.3.7\n", + "notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1610575313697/work\n", + "notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work\n", + "numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1623568544775/work\n", + "numexpr @ file:///home/conda/feedstock_root/build_artifacts/numexpr_1658076426113/work\n", + "numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1649281352817/work\n", + "numpydoc @ file:///home/conda/feedstock_root/build_artifacts/numpydoc_1665273484262/work\n", + "oauth2client==4.1.3\n", + "oauthlib @ file:///home/conda/feedstock_root/build_artifacts/oauthlib_1666056362788/work\n", + "opendatasets==0.1.22\n", + "packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work\n", + "pamela @ file:///home/conda/feedstock_root/build_artifacts/pamela_1691565434937/work\n", + "pandas==1.2.5\n", + "pandas-profiling @ file:///home/conda/feedstock_root/build_artifacts/pandas-profiling_1613839428900/work\n", + "pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work\n", + "papermill @ file:///home/conda/feedstock_root/build_artifacts/papermill_1604950649566/work\n", + "parso==0.7.0\n", + "partd @ file:///home/conda/feedstock_root/build_artifacts/partd_1695667515973/work\n", + "pathspec @ file:///home/conda/feedstock_root/build_artifacts/pathspec_1690597952537/work\n", + "patsy @ file:///home/conda/feedstock_root/build_artifacts/patsy_1665356157073/work\n", + "pexpect==4.8.0\n", + "phik @ file:///home/conda/feedstock_root/build_artifacts/phik_1697266240235/work\n", + "pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602535658641/work\n", + "Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1666920566244/work\n", + "pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work\n", + "platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1699715570510/work\n", + "pluggy @ file:///home/conda/feedstock_root/build_artifacts/pluggy_1693086607691/work\n", + "pointpats @ file:///home/conda/feedstock_root/build_artifacts/pointpats_1678201881705/work\n", + "pooch @ file:///home/conda/feedstock_root/build_artifacts/pooch_1698245576425/work\n", + "portalocker @ file:///home/conda/feedstock_root/build_artifacts/portalocker_1695662050140/work\n", + "poyo==0.5.0\n", + "prettytable @ file:///home/conda/feedstock_root/build_artifacts/prettytable_1694464263010/work\n", + "prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1698692549203/work\n", + "prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1699631011458/work\n", + "proto-plus==1.22.3\n", + "protobuf==3.20.3\n", + "psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1695367190297/work\n", + "ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl\n", + "PuLP @ file:///home/conda/feedstock_root/build_artifacts/pulp_1695847465904/work\n", + "pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work\n", + "pure-sasl @ file:///home/conda/feedstock_root/build_artifacts/pure-sasl_1631890804823/work\n", + "py4j==0.10.9\n", + "pyarrow==2.0.0\n", + "pyasn1 @ file:///home/conda/feedstock_root/build_artifacts/pyasn1_1694615621498/work\n", + "pyasn1-modules @ file:///home/conda/feedstock_root/build_artifacts/pyasn1-modules_1695107857548/work\n", + "pycodestyle @ file:///home/conda/feedstock_root/build_artifacts/pycodestyle_1589305246696/work\n", + "pycosat @ file:///home/conda/feedstock_root/build_artifacts/pycosat_1696355775111/work\n", + "pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work\n", + "pycurl==7.45.1\n", + "pydocstyle @ file:///home/conda/feedstock_root/build_artifacts/pydocstyle_1673997487070/work\n", + "pydot @ file:///home/conda/feedstock_root/build_artifacts/pydot_1695469127091/work\n", + "pyflakes==2.2.0\n", + "Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work\n", + "PyHive @ file:///home/conda/feedstock_root/build_artifacts/pyhive_1646707521362/work\n", + "PyJWT @ file:///home/conda/feedstock_root/build_artifacts/pyjwt_1689721553971/work\n", + "pylint @ file:///home/conda/feedstock_root/build_artifacts/pylint_1698005019851/work\n", + "pyls-black @ file:///home/conda/feedstock_root/build_artifacts/pyls-black_1595615126037/work\n", + "pyls-spyder @ file:///home/conda/feedstock_root/build_artifacts/pyls-spyder_1613487177406/work\n", + "pyOpenSSL @ file:///home/conda/feedstock_root/build_artifacts/pyopenssl_1685514481738/work\n", + "pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1635267989520/work\n", + "pyproj @ file:///home/conda/feedstock_root/build_artifacts/pyproj_1650803108421/work\n", + "PyQt5==5.12.3\n", + "PyQt5_sip==4.19.18\n", + "PyQtChart==5.12\n", + "PyQtWebEngine==5.12.1\n", + "pysal @ file:///home/conda/feedstock_root/build_artifacts/pysal_1612819487814/work\n", + "PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work\n", + "# Editable install with no version control (pyspark==3.1.3)\n", + "-e /usr/lib/spark/python\n", + "pytest @ file:///home/conda/feedstock_root/build_artifacts/pytest_1698233724984/work\n", + "pytest-cov @ file:///home/conda/feedstock_root/build_artifacts/pytest-cov_1684964868191/work\n", + "python-dateutil==2.8.0\n", + "python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work\n", + "python-jsonrpc-server @ file:///home/conda/feedstock_root/build_artifacts/python-jsonrpc-server_1599827444631/work\n", + "python-language-server @ file:///home/conda/feedstock_root/build_artifacts/python-language-server_1607720213724/work\n", + "python-slugify @ file:///home/conda/feedstock_root/build_artifacts/python-slugify-split_1694282063120/work\n", + "pytoolconfig @ file:///home/conda/feedstock_root/build_artifacts/pytoolconfig_1675124745143/work\n", + "pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work\n", + "pyu2f @ file:///home/conda/feedstock_root/build_artifacts/pyu2f_1604248910016/work\n", + "PyWavelets @ file:///home/conda/feedstock_root/build_artifacts/pywavelets_1649616412805/work\n", + "pyxdg @ file:///home/conda/feedstock_root/build_artifacts/pyxdg_1654536799286/work\n", + "PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373436676/work\n", + "pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1698062423217/work\n", + "QDarkStyle @ file:///home/conda/feedstock_root/build_artifacts/qdarkstyle_1617328841504/work\n", + "qstylizer @ file:///home/conda/feedstock_root/build_artifacts/qstylizer_1662244505808/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl\n", + "QtAwesome @ file:///home/conda/feedstock_root/build_artifacts/qtawesome_1678418951316/work\n", + "qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1699244156891/work\n", + "QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1698112029416/work\n", + "quantecon @ file:///home/conda/feedstock_root/build_artifacts/quantecon_1655746571862/work\n", + "rasterio @ file:///home/conda/feedstock_root/build_artifacts/rasterio_1655388667652/work\n", + "rasterstats @ file:///home/conda/feedstock_root/build_artifacts/rasterstats_1685447679213/work\n", + "referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1691337268233/work\n", + "regex @ file:///home/conda/feedstock_root/build_artifacts/regex_1617644422046/work\n", + "requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1608156231189/work\n", + "requests-oauthlib @ file:///home/conda/feedstock_root/build_artifacts/requests-oauthlib_1643557462909/work\n", + "rope @ file:///home/conda/feedstock_root/build_artifacts/rope_1699525256910/work\n", + "rpds-py @ file:///home/conda/feedstock_root/build_artifacts/rpds-py_1699109843138/work\n", + "rsa @ file:///home/conda/feedstock_root/build_artifacts/rsa_1658328885051/work\n", + "Rtree @ file:///home/conda/feedstock_root/build_artifacts/rtree_1637430736605/work\n", + "ruamel-yaml-conda @ file:///home/conda/feedstock_root/build_artifacts/ruamel_yaml_1695546195936/work\n", + "ruamel.yaml @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml_1699007344708/work\n", + "ruamel.yaml.clib @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml.clib_1695996844669/work\n", + "scikit-image @ file:///home/conda/feedstock_root/build_artifacts/scikit-image_1638363134145/work\n", + "scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1630910537183/work\n", + "scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1619561901336/work\n", + "seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-split_1629095986539/work\n", + "SecretStorage @ file:///home/conda/feedstock_root/build_artifacts/secretstorage_1695551746400/work\n", + "segregation @ file:///home/conda/feedstock_root/build_artifacts/segregation_1696427305843/work\n", + "Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1682601222253/work\n", + "Shapely @ file:///home/conda/feedstock_root/build_artifacts/shapely_1651793098501/work\n", + "simplejson @ file:///home/conda/feedstock_root/build_artifacts/simplejson_1696595967770/work\n", + "six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work\n", + "smmap @ file:///home/conda/feedstock_root/build_artifacts/smmap_1634310307496/work\n", + "sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work\n", + "snowballstemmer @ file:///home/conda/feedstock_root/build_artifacts/snowballstemmer_1637143057757/work\n", + "snuggs==1.4.7\n", + "sortedcontainers @ file:///home/conda/feedstock_root/build_artifacts/sortedcontainers_1621217038088/work\n", + "soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work\n", + "spaghetti @ file:///home/conda/feedstock_root/build_artifacts/spaghetti_1696295637619/work\n", + "spark-nlp==4.4.0\n", + "spark-nlp-display==4.4\n", + "spglm @ file:///home/conda/feedstock_root/build_artifacts/spglm_1698250481025/work\n", + "Sphinx @ file:///home/conda/feedstock_root/build_artifacts/sphinx_1690955392406/work\n", + "sphinxcontrib-applehelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-applehelp_1674487779667/work\n", + "sphinxcontrib-devhelp==1.0.2\n", + "sphinxcontrib-htmlhelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-htmlhelp_1675256494457/work\n", + "sphinxcontrib-jsmath @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-jsmath_1691604704163/work\n", + "sphinxcontrib-qthelp==1.0.3\n", + "sphinxcontrib-serializinghtml @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-serializinghtml_1649380998999/work\n", + "spint @ file:///home/conda/feedstock_root/build_artifacts/spint_1696602360359/work\n", + "splot @ file:///home/conda/feedstock_root/build_artifacts/splot_1649898658322/work\n", + "spopt @ file:///home/conda/feedstock_root/build_artifacts/spopt_1655150061954/work\n", + "spreg @ file:///home/conda/feedstock_root/build_artifacts/spreg_1695792092600/work\n", + "spvcm @ file:///home/conda/feedstock_root/build_artifacts/spvcm_1696623913651/work\n", + "spyder @ file:///home/conda/feedstock_root/build_artifacts/spyder_1627140945937/work\n", + "spyder-kernels @ file:///home/conda/feedstock_root/build_artifacts/spyder-kernels_1625331173960/work\n", + "spylon==0.3.0\n", + "spylon-kernel==0.4.1\n", + "SQLAlchemy @ file:///home/conda/feedstock_root/build_artifacts/sqlalchemy_1697018588089/work\n", + "sqlparse @ file:///home/conda/feedstock_root/build_artifacts/sqlparse_1681817562700/work\n", + "stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work\n", + "statsmodels @ file:///home/conda/feedstock_root/build_artifacts/statsmodels_1654787099639/work\n", + "svgwrite==1.4\n", + "sympy @ file:///home/conda/feedstock_root/build_artifacts/sympy_1618015367433/work\n", + "tables @ file:///home/conda/feedstock_root/build_artifacts/pytables_1638208858826/work\n", + "tangled-up-in-unicode @ file:///home/conda/feedstock_root/build_artifacts/tangled-up-in-unicode_1632832610704/work\n", + "tblib @ file:///home/conda/feedstock_root/build_artifacts/tblib_1694702375735/work\n", + "tenacity @ file:///home/conda/feedstock_root/build_artifacts/tenacity_1692026804430/work\n", + "terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1699810101464/work\n", + "testpath @ file:///home/conda/feedstock_root/build_artifacts/testpath_1645693042223/work\n", + "text-unidecode @ file:///home/conda/feedstock_root/build_artifacts/text-unidecode_1694707102786/work\n", + "textdistance @ file:///home/conda/feedstock_root/build_artifacts/textdistance_1663527496115/work\n", + "textwrap3==0.9.2\n", + "threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1689261241048/work\n", + "three-merge @ file:///home/conda/feedstock_root/build_artifacts/three-merge_1595515817927/work\n", + "thrift @ file:///home/conda/feedstock_root/build_artifacts/thrift_1695546065194/work/lib/py\n", + "thrift-sasl @ file:///home/conda/feedstock_root/build_artifacts/thrift_sasl_1631824374965/work\n", + "tifffile @ file:///home/conda/feedstock_root/build_artifacts/tifffile_1665588749940/work\n", + "tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work\n", + "tobler @ file:///home/conda/feedstock_root/build_artifacts/tobler_1696384105449/work\n", + "toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work\n", + "tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work\n", + "tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1698950496895/work\n", + "toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work\n", + "tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827257044/work\n", + "tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1691671248568/work\n", + "traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work\n", + "typed-ast @ file:///home/conda/feedstock_root/build_artifacts/typed-ast_1695409893559/work\n", + "types-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/types-python-dateutil_1689882883784/work\n", + "typing-inspect==0.9.0\n", + "typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work\n", + "ujson @ file:///home/conda/feedstock_root/build_artifacts/ujson_1695472604200/work\n", + "uritemplate==3.0.1\n", + "urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1603125704209/work\n", + "virtualenv @ file:///home/conda/feedstock_root/build_artifacts/virtualenv_1643238754089/work\n", + "visions @ file:///home/conda/feedstock_root/build_artifacts/visions_1600915384170/work\n", + "watchdog @ file:///home/conda/feedstock_root/build_artifacts/watchdog_1695395257294/work\n", + "wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1698744702785/work\n", + "webencodings @ file:///home/conda/feedstock_root/build_artifacts/webencodings_1694681268211/work\n", + "websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1696770128353/work\n", + "widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1637174139311/work\n", + "wurlitzer @ file:///home/conda/feedstock_root/build_artifacts/wurlitzer_1669944596833/work\n", + "xyzservices @ file:///home/conda/feedstock_root/build_artifacts/xyzservices_1698325309404/work\n", + "yapf @ file:///home/conda/feedstock_root/build_artifacts/yapf_1690387939953/work\n", + "yarl @ file:///home/conda/feedstock_root/build_artifacts/yarl_1696732512110/work\n", + "zict @ file:///home/conda/feedstock_root/build_artifacts/zict_1681770155528/work\n", + "zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work\n", + "zstandard==0.22.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# Check packages:\n", + "%pip freeze" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23ca0926-4afa-4a2a-9fa0-2eb77f6dcce6", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "PySpark", + "display_name": "Python 3", "language": "python", - "name": "pyspark" + "name": "python3" }, "language_info": { "codemirror_mode": {