diff --git a/clustering_analysis.ipynb b/clustering_analysis.ipynb new file mode 100644 index 0000000..c1a1741 --- /dev/null +++ b/clustering_analysis.ipynb @@ -0,0 +1,3456 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "90484ea3-113d-4578-a654-1f80d22d49e6", + "metadata": {}, + "source": [ + "# Unsupervised ML\n", + "\n", + "This notebook will be loading in the data, and then running time-series k-means clustering by count on the following:\n", + "\n", + "1. Pickups in Chicago\n", + "2. Pickups in Hyde Park (pre-program)\n", + "3. Pickups in Hyde Park (program)\n", + "\n", + "Here's the Apache documentation I'll be drawing inspiration from:\n", + "\n", + "https://spark.apache.org/docs/latest/ml-clustering.html\n", + "https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.clustering.KMeans.html\n", + "\n", + "And here's the article that helped me out:\n", + "\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-one/\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-two/\n", + "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-three/" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "241961fd-69dd-4036-839c-d5ff609e034a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('spark.stage.maxConsecutiveAttempts', '10'),\n", + " ('spark.dynamicAllocation.minExecutors', '1'),\n", + " ('spark.eventLog.enabled', 'true'),\n", + " ('spark.submit.pyFiles',\n", + " '/root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,/root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,/root/.ivy2/jars/com.typesafe_config-1.4.2.jar,/root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,/root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,/root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,/root/.ivy2/jars/com.navigamez_greex-1.0.jar,/root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,/root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,/root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,/root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,/root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,/root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,/root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,/root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,/root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,/root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,/root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,/root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,/root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,/root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,/root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,/root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,/root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,/root/.ivy2/jars/com.google.api_gax-2.20.1.jar,/root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,/root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,/root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,/root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,/root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,/root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,/root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,/root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,/root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,/root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,/root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,/root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,/root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,/root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,/root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,/root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,/root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,/root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,/root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,/root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,/root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.dataproc.metrics.listener.metrics.collector.hostname',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", + " ('spark.driver.port', '40031'),\n", + " ('spark.dataproc.sql.joinConditionReorder.enabled', 'true'),\n", + " ('spark.sql.autoBroadcastJoinThreshold', '191m'),\n", + " ('spark.kryoserializer.buffer.max', '2000M'),\n", + " ('spark.serializer', 'org.apache.spark.serializer.KryoSerializer'),\n", + " ('spark.dataproc.sql.local.rank.pushdown.enabled', 'true'),\n", + " ('spark.ui.proxyBase', '/proxy/application_1700850934132_0002'),\n", + " ('spark.driver.host',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal'),\n", + " ('spark.driver.maxResultSize', '0'),\n", + " ('spark.yarn.unmanagedAM.enabled', 'true'),\n", + " ('spark.ui.filters',\n", + " 'org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter'),\n", + " ('spark.app.startTime', '1700858434920'),\n", + " ('spark.metrics.namespace',\n", + " 'app_name:${spark.app.name}.app_id:${spark.app.id}'),\n", + " ('spark.executor.memory', '4g'),\n", + " ('spark.dataproc.sql.optimizer.leftsemijoin.conversion.enabled', 'true'),\n", + " ('spark.hadoop.hive.execution.engine', 'mr'),\n", + " ('spark.executorEnv.PYTHONPATH',\n", + " '{{PWD}}/pyspark.zip{{PWD}}/py4j-0.10.9-src.zip{{PWD}}/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar{{PWD}}/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar{{PWD}}/com.typesafe_config-1.4.2.jar{{PWD}}/org.rocksdb_rocksdbjni-6.29.5.jar{{PWD}}/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar{{PWD}}/com.github.universal-automata_liblevenshtein-3.0.0.jar{{PWD}}/com.google.cloud_google-cloud-storage-2.16.0.jar{{PWD}}/com.navigamez_greex-1.0.jar{{PWD}}/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar{{PWD}}/it.unimi.dsi_fastutil-7.0.12.jar{{PWD}}/org.projectlombok_lombok-1.16.8.jar{{PWD}}/com.google.guava_guava-31.1-jre.jar{{PWD}}/com.google.guava_failureaccess-1.0.1.jar{{PWD}}/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar{{PWD}}/com.google.errorprone_error_prone_annotations-2.16.jar{{PWD}}/com.google.j2objc_j2objc-annotations-1.3.jar{{PWD}}/com.google.http-client_google-http-client-1.42.3.jar{{PWD}}/io.opencensus_opencensus-contrib-http-util-0.31.1.jar{{PWD}}/com.google.http-client_google-http-client-jackson2-1.42.3.jar{{PWD}}/com.google.http-client_google-http-client-gson-1.42.3.jar{{PWD}}/com.google.api-client_google-api-client-2.1.1.jar{{PWD}}/commons-codec_commons-codec-1.15.jar{{PWD}}/com.google.oauth-client_google-oauth-client-1.34.1.jar{{PWD}}/com.google.http-client_google-http-client-apache-v2-1.42.3.jar{{PWD}}/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar{{PWD}}/com.google.code.gson_gson-2.10.jar{{PWD}}/com.google.cloud_google-cloud-core-2.9.0.jar{{PWD}}/com.google.auto.value_auto-value-annotations-1.10.1.jar{{PWD}}/com.google.cloud_google-cloud-core-http-2.9.0.jar{{PWD}}/com.google.http-client_google-http-client-appengine-1.42.3.jar{{PWD}}/com.google.api_gax-httpjson-0.105.1.jar{{PWD}}/com.google.cloud_google-cloud-core-grpc-2.9.0.jar{{PWD}}/io.grpc_grpc-core-1.51.0.jar{{PWD}}/com.google.api_gax-2.20.1.jar{{PWD}}/com.google.api_gax-grpc-2.20.1.jar{{PWD}}/io.grpc_grpc-alts-1.51.0.jar{{PWD}}/io.grpc_grpc-grpclb-1.51.0.jar{{PWD}}/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar{{PWD}}/io.grpc_grpc-protobuf-1.51.0.jar{{PWD}}/com.google.auth_google-auth-library-credentials-1.13.0.jar{{PWD}}/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar{{PWD}}/com.google.api_api-common-2.2.2.jar{{PWD}}/javax.annotation_javax.annotation-api-1.3.2.jar{{PWD}}/io.opencensus_opencensus-api-0.31.1.jar{{PWD}}/io.grpc_grpc-context-1.51.0.jar{{PWD}}/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar{{PWD}}/com.google.protobuf_protobuf-java-3.21.10.jar{{PWD}}/com.google.protobuf_protobuf-java-util-3.21.10.jar{{PWD}}/com.google.api.grpc_proto-google-common-protos-2.11.0.jar{{PWD}}/org.threeten_threetenbp-1.6.4.jar{{PWD}}/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.fasterxml.jackson.core_jackson-core-2.14.1.jar{{PWD}}/com.google.code.findbugs_jsr305-3.0.2.jar{{PWD}}/io.grpc_grpc-api-1.51.0.jar{{PWD}}/io.grpc_grpc-auth-1.51.0.jar{{PWD}}/io.grpc_grpc-stub-1.51.0.jar{{PWD}}/org.checkerframework_checker-qual-3.28.0.jar{{PWD}}/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar{{PWD}}/io.grpc_grpc-protobuf-lite-1.51.0.jar{{PWD}}/com.google.android_annotations-4.1.1.4.jar{{PWD}}/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar{{PWD}}/io.grpc_grpc-netty-shaded-1.51.0.jar{{PWD}}/io.perfmark_perfmark-api-0.26.0.jar{{PWD}}/io.grpc_grpc-googleapis-1.51.0.jar{{PWD}}/io.grpc_grpc-xds-1.51.0.jar{{PWD}}/io.opencensus_opencensus-proto-0.2.0.jar{{PWD}}/io.grpc_grpc-services-1.51.0.jar{{PWD}}/com.google.re2j_re2j-1.6.jar{{PWD}}/dk.brics.automaton_automaton-1.11-8.jar{{PWD}}/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", + " ('spark.executor.id', 'driver'),\n", + " ('spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version', '2'),\n", + " ('spark.dynamicAllocation.maxExecutors', '10000'),\n", + " ('spark.yarn.dist.pyFiles',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.yarn.am.attemptFailuresValidityInterval', '1h'),\n", + " ('spark.app.name', 'Spark Updated Conf'),\n", + " ('spark.sql.catalogImplementation', 'hive'),\n", + " ('spark.driver.appUIAddress',\n", + " 'http://hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal:33679'),\n", + " ('spark.executorEnv.OPENBLAS_NUM_THREADS', '1'),\n", + " ('spark.yarn.secondary.jars',\n", + " 'com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,com.typesafe_config-1.4.2.jar,org.rocksdb_rocksdbjni-6.29.5.jar,com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,com.github.universal-automata_liblevenshtein-3.0.0.jar,com.google.cloud_google-cloud-storage-2.16.0.jar,com.navigamez_greex-1.0.jar,com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,it.unimi.dsi_fastutil-7.0.12.jar,org.projectlombok_lombok-1.16.8.jar,com.google.guava_guava-31.1-jre.jar,com.google.guava_failureaccess-1.0.1.jar,com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,com.google.errorprone_error_prone_annotations-2.16.jar,com.google.j2objc_j2objc-annotations-1.3.jar,com.google.http-client_google-http-client-1.42.3.jar,io.opencensus_opencensus-contrib-http-util-0.31.1.jar,com.google.http-client_google-http-client-jackson2-1.42.3.jar,com.google.http-client_google-http-client-gson-1.42.3.jar,com.google.api-client_google-api-client-2.1.1.jar,commons-codec_commons-codec-1.15.jar,com.google.oauth-client_google-oauth-client-1.34.1.jar,com.google.http-client_google-http-client-apache-v2-1.42.3.jar,com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,com.google.code.gson_gson-2.10.jar,com.google.cloud_google-cloud-core-2.9.0.jar,com.google.auto.value_auto-value-annotations-1.10.1.jar,com.google.cloud_google-cloud-core-http-2.9.0.jar,com.google.http-client_google-http-client-appengine-1.42.3.jar,com.google.api_gax-httpjson-0.105.1.jar,com.google.cloud_google-cloud-core-grpc-2.9.0.jar,io.grpc_grpc-core-1.51.0.jar,com.google.api_gax-2.20.1.jar,com.google.api_gax-grpc-2.20.1.jar,io.grpc_grpc-alts-1.51.0.jar,io.grpc_grpc-grpclb-1.51.0.jar,org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,io.grpc_grpc-protobuf-1.51.0.jar,com.google.auth_google-auth-library-credentials-1.13.0.jar,com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,com.google.api_api-common-2.2.2.jar,javax.annotation_javax.annotation-api-1.3.2.jar,io.opencensus_opencensus-api-0.31.1.jar,io.grpc_grpc-context-1.51.0.jar,com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,com.google.protobuf_protobuf-java-3.21.10.jar,com.google.protobuf_protobuf-java-util-3.21.10.jar,com.google.api.grpc_proto-google-common-protos-2.11.0.jar,org.threeten_threetenbp-1.6.4.jar,com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,com.fasterxml.jackson.core_jackson-core-2.14.1.jar,com.google.code.findbugs_jsr305-3.0.2.jar,io.grpc_grpc-api-1.51.0.jar,io.grpc_grpc-auth-1.51.0.jar,io.grpc_grpc-stub-1.51.0.jar,org.checkerframework_checker-qual-3.28.0.jar,com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,io.grpc_grpc-protobuf-lite-1.51.0.jar,com.google.android_annotations-4.1.1.4.jar,org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,io.grpc_grpc-netty-shaded-1.51.0.jar,io.perfmark_perfmark-api-0.26.0.jar,io.grpc_grpc-googleapis-1.51.0.jar,io.grpc_grpc-xds-1.51.0.jar,io.opencensus_opencensus-proto-0.2.0.jar,io.grpc_grpc-services-1.51.0.jar,com.google.re2j_re2j-1.6.jar,dk.brics.automaton_automaton-1.11-8.jar,org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.history.fs.logDirectory',\n", + " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/6e7bc176-17b3-42b1-8449-77c1f54798f9/spark-job-history'),\n", + " ('spark.repl.local.jars',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.sql.cbo.enabled', 'true'),\n", + " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES',\n", + " 'http://hub-msca-bdp-dphub-students-rohitk-m:8088/proxy/application_1700850934132_0002'),\n", + " ('spark.yarn.dist.jars',\n", + " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", + " ('spark.dataproc.sql.parquet.enableFooterCache', 'true'),\n", + " ('spark.driver.memory', '4g'),\n", + " ('spark.sql.warehouse.dir', 'file:/spark-warehouse'),\n", + " ('spark.app.id', 'application_1700850934132_0002'),\n", + " ('spark.yarn.executor.failuresValidityInterval', '1h'),\n", + " ('spark.yarn.am.memory', '640m'),\n", + " ('spark.yarn.historyServer.address',\n", + " 'hub-msca-bdp-dphub-students-rohitk-m:18080'),\n", + " ('spark.cores.max', '4'),\n", + " ('spark.executor.cores', '4'),\n", + " ('spark.jars.packages',\n", + " 'com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.0,graphframes:graphframes:0.8.2-spark3.1-s_2.12'),\n", + " ('spark.executor.instances', '2'),\n", + " ('spark.dataproc.listeners',\n", + " 'com.google.cloud.spark.performance.DataprocMetricsListener'),\n", + " ('spark.serializer.objectStreamReset', '100'),\n", + " ('spark.eventLog.dir',\n", + " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/6e7bc176-17b3-42b1-8449-77c1f54798f9/spark-job-history'),\n", + " ('spark.submit.deployMode', 'client'),\n", + " ('spark.sql.cbo.joinReorder.enabled', 'true'),\n", + " ('spark.shuffle.service.enabled', 'true'),\n", + " ('spark.scheduler.mode', 'FAIR'),\n", + " ('spark.sql.adaptive.enabled', 'true'),\n", + " ('spark.yarn.jars', 'local:/usr/lib/spark/jars/*'),\n", + " ('spark.scheduler.minRegisteredResourcesRatio', '0.0'),\n", + " ('spark.master', 'yarn'),\n", + " ('spark.ui.port', '0'),\n", + " ('spark.rpc.message.maxSize', '512'),\n", + " ('spark.rdd.compress', 'True'),\n", + " ('spark.task.maxFailures', '10'),\n", + " ('spark.yarn.isPython', 'true'),\n", + " ('spark.dynamicAllocation.enabled', 'true'),\n", + " ('spark.ui.showConsoleProgress', 'true')]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# read in packages create spark environment\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql import functions as F\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "import geopandas as gpd\n", + "from pyspark.ml.feature import VectorAssembler\n", + "from pyspark.ml.clustering import KMeans\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.ml.feature import StandardScaler\n", + "from pyspark.ml.evaluation import ClusteringEvaluator\n", + "\n", + "spark = SparkSession.builder.appName('unsupervised').getOrCreate()\n", + "\n", + "#change configuration settings on Spark \n", + "conf = spark.sparkContext._conf.setAll([('spark.executor.memory', '4g'), ('spark.app.name', 'Spark Updated Conf'), ('spark.executor.cores', '4'), ('spark.cores.max', '4'), ('spark.driver.memory','4g')])\n", + "\n", + "#print spark configuration settings\n", + "spark.sparkContext.getConf().getAll()" + ] + }, + { + "cell_type": "markdown", + "id": "6e8338f7-2c56-4e05-b6b3-78577485dae4", + "metadata": {}, + "source": [ + "### Reading in cleaned data, partitioning" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b98781e4-a2cd-4da2-aa73-70de31956265", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "| ID| start_timestamp| end_timestamp|seconds|miles|pickup_tract|dropoff_tract|pickup_area|dropoff_area|Fare|Tip|total| pickup_lat| pickup_lon| dropoff_lat| dropoff_lon|month|day_of_month|hour|day|\n", + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "|625e77ae6e0ff7191...|2018-11-06 19:00:00|2018-11-06 19:15:00| 1142| 5.8| 17031063400| 17031010400| 6| 1|12.5| 0| 15.0|41.9346591566|-87.6467297286| 42.004764559| -87.659122427| 11| 6| 19| 3|\n", + "|62945fdb2e70957f0...|2018-11-06 19:00:00|2018-11-06 19:00:00| 341| 1.2| 17031081800| 17031833000| 8| 28| 5.0| 0| 7.5|41.8932163595|-87.6378442095|41.8852813201|-87.6572331997| 11| 6| 19| 3|\n", + "|6dc03f91e4480d237...|2018-11-06 19:00:00|2018-11-06 19:00:00| 558| 1.2| 17031070400| 17031061500| 7| 6| 7.5| 0| 10.3|41.9289672664|-87.6561568309|41.9452823311|-87.6615450961| 11| 6| 19| 3|\n", + "|773894079a526afa1...|2018-11-06 19:00:00|2018-11-06 19:30:00| 1047| 2.8| 17031832200| 17031062100| 22| 6|10.0| 2| 14.5|41.9204515116|-87.6799547678|41.9426918444|-87.6517705068| 11| 6| 19| 3|\n", + "|7acf0a7f2edfbe546...|2018-11-06 19:00:00|2018-11-06 19:00:00| 502| 1.3| 17031839100| 17031081700| 32| 8| 2.5| 0| 5.0|41.8809944707|-87.6327464887|41.8920421365|-87.6318639497| 11| 6| 19| 3|\n", + "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", + "only showing top 5 rows\n", + "\n" + ] + } + ], + "source": [ + "# read in rideshare data for all years, concatenate, create appropriate partitioning\n", + "# we are dropping 2020 because covid will affect the performance of our model\n", + "\n", + "df_2018 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2018.csv\", inferSchema=True, header=True)\n", + "df_2019 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2019.csv\", inferSchema=True, header=True)\n", + "df_2021 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2021.csv\", inferSchema=True, header=True)\n", + "df_2022 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2022.csv\", inferSchema=True, header=True)\n", + "df_2023 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2023.csv\", inferSchema=True, header=True)\n", + "\n", + "# dropping new columns in 2023\n", + "df_2023 = df_2023.drop('Shared Trip Match','Percent Time Chicago','Percent Distance Chicago')\n", + "\n", + "df_all = df_2018.union(df_2019).union(df_2021).union(df_2022).union(df_2023)\n", + "df_all.show(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "978c6a20-72a3-4a7e-a588-4e1ad2386915", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Partitions: 544\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 16:=====================================================>(543 + 1) / 544]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------+------+\n", + "|partitionId| count|\n", + "+-----------+------+\n", + "| 42|305254|\n", + "| 41|305316|\n", + "| 40|305420|\n", + "| 38|305471|\n", + "| 39|305480|\n", + "| 37|305618|\n", + "| 36|305676|\n", + "| 35|305871|\n", + "| 34|305890|\n", + "| 33|305962|\n", + "| 32|305971|\n", + "| 31|306010|\n", + "| 29|306031|\n", + "| 30|306038|\n", + "| 28|306086|\n", + "| 27|306127|\n", + "| 26|306402|\n", + "| 25|306467|\n", + "| 24|306633|\n", + "| 23|306731|\n", + "| 22|307226|\n", + "| 243|328837|\n", + "| 242|328975|\n", + "| 241|329131|\n", + "| 240|329163|\n", + "| 239|329209|\n", + "| 237|329245|\n", + "| 235|329263|\n", + "| 238|329263|\n", + "| 234|329311|\n", + "| 236|329315|\n", + "| 232|329332|\n", + "| 233|329344|\n", + "| 231|329373|\n", + "| 228|329389|\n", + "| 229|329390|\n", + "| 227|329399|\n", + "| 225|329410|\n", + "| 226|329410|\n", + "| 224|329418|\n", + "| 230|329427|\n", + "| 223|329428|\n", + "| 220|329461|\n", + "| 222|329481|\n", + "| 221|329505|\n", + "| 217|329507|\n", + "| 218|329513|\n", + "| 219|329519|\n", + "| 216|329523|\n", + "| 214|329533|\n", + "| 213|329555|\n", + "| 215|329574|\n", + "| 211|329587|\n", + "| 212|329591|\n", + "| 208|329607|\n", + "| 210|329623|\n", + "| 206|329624|\n", + "| 209|329630|\n", + "| 207|329633|\n", + "| 205|329646|\n", + "| 202|329654|\n", + "| 204|329673|\n", + "| 203|329678|\n", + "| 194|329704|\n", + "| 201|329708|\n", + "| 200|329712|\n", + "| 191|329717|\n", + "| 189|329728|\n", + "| 188|329730|\n", + "| 199|329732|\n", + "| 193|329732|\n", + "| 198|329739|\n", + "| 190|329746|\n", + "| 195|329748|\n", + "| 196|329786|\n", + "| 186|329786|\n", + "| 197|329786|\n", + "| 187|329787|\n", + "| 192|329793|\n", + "| 183|329804|\n", + "| 184|329805|\n", + "| 179|329807|\n", + "| 182|329814|\n", + "| 181|329819|\n", + "| 185|329843|\n", + "| 178|329849|\n", + "| 177|329861|\n", + "| 180|329865|\n", + "| 172|329866|\n", + "| 173|329871|\n", + "| 171|329879|\n", + "| 175|329889|\n", + "| 174|329892|\n", + "| 176|329900|\n", + "| 170|329937|\n", + "| 168|329956|\n", + "| 162|329961|\n", + "| 167|329962|\n", + "| 164|329965|\n", + "| 169|329974|\n", + "| 165|329980|\n", + "| 163|329985|\n", + "| 159|329993|\n", + "| 166|330000|\n", + "| 158|330005|\n", + "| 161|330014|\n", + "| 157|330021|\n", + "| 160|330027|\n", + "| 156|330059|\n", + "| 154|330066|\n", + "| 147|330070|\n", + "| 153|330083|\n", + "| 155|330084|\n", + "| 150|330084|\n", + "| 146|330089|\n", + "| 152|330090|\n", + "| 149|330099|\n", + "| 151|330101|\n", + "| 148|330103|\n", + "| 144|330116|\n", + "| 145|330129|\n", + "| 140|330133|\n", + "| 143|330135|\n", + "| 141|330139|\n", + "| 142|330148|\n", + "| 139|330159|\n", + "| 135|330178|\n", + "| 137|330181|\n", + "| 132|330186|\n", + "| 138|330189|\n", + "| 136|330205|\n", + "| 129|330208|\n", + "| 133|330219|\n", + "| 125|330220|\n", + "| 128|330227|\n", + "| 134|330230|\n", + "| 127|330236|\n", + "| 130|330243|\n", + "| 124|330245|\n", + "| 117|330249|\n", + "| 131|330252|\n", + "| 121|330255|\n", + "| 122|330258|\n", + "| 126|330268|\n", + "| 123|330270|\n", + "| 118|330275|\n", + "| 113|330282|\n", + "| 115|330282|\n", + "| 120|330296|\n", + "| 112|330297|\n", + "| 119|330306|\n", + "| 114|330308|\n", + "| 104|330338|\n", + "| 116|330339|\n", + "| 109|330355|\n", + "| 105|330363|\n", + "| 111|330366|\n", + "| 110|330372|\n", + "| 108|330383|\n", + "| 107|330385|\n", + "| 106|330402|\n", + "| 96|330413|\n", + "| 102|330415|\n", + "| 100|330418|\n", + "| 98|330418|\n", + "| 101|330419|\n", + "| 94|330425|\n", + "| 97|330426|\n", + "| 103|330430|\n", + "| 99|330430|\n", + "| 95|330436|\n", + "| 90|330456|\n", + "| 92|330470|\n", + "| 88|330471|\n", + "| 91|330474|\n", + "| 93|330477|\n", + "| 87|330494|\n", + "| 86|330501|\n", + "| 89|330512|\n", + "| 84|330524|\n", + "| 82|330550|\n", + "| 85|330570|\n", + "| 80|330570|\n", + "| 81|330579|\n", + "| 83|330585|\n", + "| 78|330622|\n", + "| 79|330625|\n", + "| 76|330625|\n", + "| 75|330642|\n", + "| 77|330646|\n", + "| 71|330651|\n", + "| 74|330653|\n", + "| 73|330667|\n", + "| 70|330690|\n", + "| 65|330704|\n", + "| 72|330709|\n", + "| 67|330721|\n", + "| 66|330724|\n", + "| 69|330737|\n", + "| 62|330758|\n", + "| 63|330762|\n", + "| 64|330762|\n", + "| 68|330766|\n", + "| 60|330782|\n", + "| 59|330784|\n", + "| 56|330801|\n", + "| 57|330805|\n", + "| 61|330807|\n", + "| 58|330837|\n", + "| 53|330868|\n", + "| 55|330869|\n", + "| 54|330885|\n", + "| 52|330918|\n", + "| 50|330944|\n", + "| 51|330963|\n", + "| 49|331028|\n", + "| 48|331034|\n", + "| 47|331050|\n", + "| 46|331114|\n", + "| 45|331284|\n", + "| 44|331416|\n", + "| 543|364094|\n", + "| 542|364374|\n", + "| 541|364493|\n", + "| 537|364581|\n", + "| 538|364599|\n", + "| 539|364616|\n", + "| 540|364617|\n", + "| 536|364654|\n", + "| 534|364709|\n", + "| 535|364756|\n", + "| 532|364784|\n", + "| 533|364810|\n", + "| 529|364899|\n", + "| 530|364903|\n", + "| 531|364944|\n", + "| 528|364957|\n", + "| 527|364961|\n", + "| 524|364971|\n", + "| 525|364988|\n", + "| 526|365006|\n", + "| 522|365011|\n", + "| 523|365051|\n", + "| 521|365057|\n", + "| 520|365079|\n", + "| 518|365083|\n", + "| 517|365090|\n", + "| 519|365097|\n", + "| 516|365122|\n", + "| 514|365165|\n", + "| 515|365179|\n", + "| 513|365224|\n", + "| 509|365252|\n", + "| 506|365253|\n", + "| 511|365255|\n", + "| 508|365272|\n", + "| 510|365277|\n", + "| 512|365278|\n", + "| 507|365302|\n", + "| 505|365347|\n", + "| 502|365377|\n", + "| 503|365394|\n", + "| 504|365395|\n", + "| 501|365409|\n", + "| 500|365431|\n", + "| 498|365447|\n", + "| 499|365454|\n", + "| 497|365519|\n", + "| 496|365528|\n", + "| 495|365536|\n", + "| 492|365541|\n", + "| 489|365547|\n", + "| 488|365552|\n", + "| 487|365554|\n", + "| 490|365569|\n", + "| 493|365574|\n", + "| 484|365576|\n", + "| 494|365595|\n", + "| 485|365602|\n", + "| 491|365622|\n", + "| 486|365622|\n", + "| 483|365650|\n", + "| 482|365684|\n", + "| 481|365705|\n", + "| 479|365750|\n", + "| 478|365773|\n", + "| 477|365793|\n", + "| 480|365801|\n", + "| 475|365806|\n", + "| 474|365806|\n", + "| 473|365828|\n", + "| 476|365846|\n", + "| 472|365909|\n", + "| 471|365965|\n", + "| 470|365975|\n", + "| 469|366026|\n", + "| 466|366051|\n", + "| 467|366057|\n", + "| 468|366080|\n", + "| 464|366105|\n", + "| 465|366117|\n", + "| 462|366150|\n", + "| 463|366160|\n", + "| 458|366193|\n", + "| 461|366200|\n", + "| 460|366214|\n", + "| 459|366217|\n", + "| 456|366297|\n", + "| 457|366320|\n", + "| 455|366371|\n", + "| 454|366383|\n", + "| 453|366422|\n", + "| 452|366461|\n", + "| 451|366589|\n", + "| 450|366617|\n", + "| 449|366758|\n", + "| 448|366799|\n", + "| 447|366883|\n", + "| 446|366901|\n", + "| 445|366940|\n", + "| 444|367122|\n", + "| 21|380513|\n", + "| 20|380565|\n", + "| 19|380749|\n", + "| 18|381028|\n", + "| 17|381069|\n", + "| 16|381243|\n", + "| 15|381263|\n", + "| 14|381438|\n", + "| 13|381470|\n", + "| 12|381544|\n", + "| 11|381646|\n", + "| 10|381711|\n", + "| 8|381721|\n", + "| 9|381753|\n", + "| 7|381759|\n", + "| 6|381763|\n", + "| 5|381783|\n", + "| 4|381827|\n", + "| 3|381971|\n", + "| 1|382022|\n", + "| 2|382029|\n", + "| 0|382095|\n", + "| 342|420259|\n", + "| 343|420346|\n", + "| 341|420485|\n", + "| 340|420525|\n", + "| 339|420707|\n", + "| 336|421031|\n", + "| 337|421040|\n", + "| 338|421052|\n", + "| 334|421107|\n", + "| 335|421142|\n", + "| 333|421374|\n", + "| 330|421440|\n", + "| 332|421479|\n", + "| 331|421531|\n", + "| 327|421574|\n", + "| 328|421603|\n", + "| 329|421610|\n", + "| 326|421612|\n", + "| 322|421670|\n", + "| 320|421675|\n", + "| 325|421679|\n", + "| 324|421681|\n", + "| 319|421687|\n", + "| 323|421687|\n", + "| 321|421699|\n", + "| 318|421751|\n", + "| 315|421832|\n", + "| 310|421867|\n", + "| 316|421897|\n", + "| 312|421903|\n", + "| 317|421911|\n", + "| 314|421918|\n", + "| 313|421920|\n", + "| 311|421950|\n", + "| 309|421972|\n", + "| 307|421988|\n", + "| 308|422019|\n", + "| 305|422072|\n", + "| 303|422083|\n", + "| 306|422091|\n", + "| 304|422095|\n", + "| 302|422097|\n", + "| 298|422103|\n", + "| 300|422114|\n", + "| 301|422116|\n", + "| 295|422134|\n", + "| 299|422155|\n", + "| 296|422155|\n", + "| 290|422185|\n", + "| 297|422193|\n", + "| 294|422194|\n", + "| 292|422207|\n", + "| 291|422218|\n", + "| 293|422236|\n", + "| 288|422238|\n", + "| 286|422255|\n", + "| 289|422265|\n", + "| 287|422266|\n", + "| 285|422305|\n", + "| 283|422307|\n", + "| 284|422346|\n", + "| 282|422350|\n", + "| 281|422354|\n", + "| 280|422372|\n", + "| 279|422415|\n", + "| 278|422498|\n", + "| 277|422501|\n", + "| 276|422508|\n", + "| 275|422549|\n", + "| 274|422557|\n", + "| 273|422591|\n", + "| 272|422625|\n", + "| 270|422634|\n", + "| 269|422671|\n", + "| 268|422673|\n", + "| 271|422692|\n", + "| 267|422694|\n", + "| 265|422761|\n", + "| 262|422777|\n", + "| 260|422788|\n", + "| 263|422795|\n", + "| 266|422803|\n", + "| 264|422807|\n", + "| 258|422838|\n", + "| 259|422839|\n", + "| 261|422841|\n", + "| 257|422852|\n", + "| 256|422891|\n", + "| 252|422904|\n", + "| 255|422925|\n", + "| 254|422986|\n", + "| 253|423003|\n", + "| 250|423197|\n", + "| 251|423202|\n", + "| 248|423231|\n", + "| 249|423262|\n", + "| 246|423376|\n", + "| 247|423402|\n", + "| 245|423403|\n", + "| 244|423762|\n", + "| 43|457702|\n", + "| 443|569570|\n", + "| 442|570154|\n", + "| 441|570301|\n", + "| 440|570372|\n", + "| 439|570572|\n", + "| 438|570655|\n", + "| 436|570763|\n", + "| 437|570781|\n", + "| 434|570870|\n", + "| 435|570872|\n", + "| 433|570953|\n", + "| 432|570979|\n", + "| 431|571069|\n", + "| 429|571096|\n", + "| 430|571097|\n", + "| 428|571127|\n", + "| 427|571153|\n", + "| 426|571185|\n", + "| 425|571201|\n", + "| 424|571286|\n", + "| 423|571425|\n", + "| 422|571449|\n", + "| 417|571506|\n", + "| 420|571528|\n", + "| 421|571532|\n", + "| 419|571553|\n", + "| 418|571585|\n", + "| 416|571595|\n", + "| 414|571645|\n", + "| 415|571657|\n", + "| 413|571742|\n", + "| 412|571766|\n", + "| 411|571796|\n", + "| 409|571842|\n", + "| 410|571847|\n", + "| 407|571874|\n", + "| 408|571913|\n", + "| 406|571925|\n", + "| 405|571966|\n", + "| 404|571983|\n", + "| 402|571993|\n", + "| 403|572020|\n", + "| 401|572123|\n", + "| 397|572181|\n", + "| 400|572182|\n", + "| 399|572183|\n", + "| 398|572189|\n", + "| 396|572212|\n", + "| 395|572244|\n", + "| 393|572249|\n", + "| 394|572276|\n", + "| 392|572302|\n", + "| 391|572344|\n", + "| 390|572361|\n", + "| 389|572382|\n", + "| 388|572394|\n", + "| 387|572428|\n", + "| 386|572438|\n", + "| 385|572493|\n", + "| 383|572545|\n", + "| 384|572565|\n", + "| 382|572569|\n", + "| 381|572600|\n", + "| 380|572604|\n", + "| 379|572634|\n", + "| 378|572647|\n", + "| 376|572742|\n", + "| 375|572742|\n", + "| 377|572755|\n", + "| 374|572798|\n", + "| 372|572800|\n", + "| 373|572816|\n", + "| 371|572868|\n", + "| 370|572895|\n", + "| 369|572907|\n", + "| 368|572924|\n", + "| 367|572957|\n", + "| 366|573022|\n", + "| 364|573102|\n", + "| 365|573104|\n", + "| 362|573132|\n", + "| 363|573145|\n", + "| 361|573173|\n", + "| 360|573187|\n", + "| 358|573262|\n", + "| 359|573270|\n", + "| 357|573334|\n", + "| 356|573372|\n", + "| 355|573425|\n", + "| 354|573556|\n", + "| 353|573584|\n", + "| 352|573658|\n", + "| 351|573676|\n", + "| 350|573781|\n", + "| 349|573977|\n", + "| 347|574013|\n", + "| 348|574040|\n", + "| 346|574185|\n", + "| 345|574318|\n", + "| 344|574727|\n", + "+-----------+------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "#display number of records by partition\n", + "def displaypartitions(df):\n", + " #number of records by partition\n", + " num = df.rdd.getNumPartitions()\n", + " print(\"Partitions:\", num)\n", + " df.withColumn(\"partitionId\", F.spark_partition_id())\\\n", + " .groupBy(\"partitionId\")\\\n", + " .count()\\\n", + " .orderBy(F.asc(\"count\"))\\\n", + " .show(num)\n", + "\n", + "df_all.rdd.getNumPartitions()\n", + "displaypartitions(df_all)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8c914559-481c-4dbe-8438-91eeb2795b54", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 19:=====================================================>(543 + 1) / 544]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Partitions: 600\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 25:================================================> (184 + 16) / 200]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------+------+\n", + "|partitionId| count|\n", + "+-----------+------+\n", + "| 25|362151|\n", + "| 33|362151|\n", + "| 32|362151|\n", + "| 38|362151|\n", + "| 39|362152|\n", + "| 29|362152|\n", + "| 598|362152|\n", + "| 15|362152|\n", + "| 37|362152|\n", + "| 597|362152|\n", + "| 17|362152|\n", + "| 16|362152|\n", + "| 24|362152|\n", + "| 26|362152|\n", + "| 40|362152|\n", + "| 8|362153|\n", + "| 13|362153|\n", + "| 41|362153|\n", + "| 11|362153|\n", + "| 34|362153|\n", + "| 35|362153|\n", + "| 31|362153|\n", + "| 18|362153|\n", + "| 14|362153|\n", + "| 27|362153|\n", + "| 30|362153|\n", + "| 20|362153|\n", + "| 23|362153|\n", + "| 63|362154|\n", + "| 22|362154|\n", + "| 64|362154|\n", + "| 10|362154|\n", + "| 48|362154|\n", + "| 19|362154|\n", + "| 599|362154|\n", + "| 5|362154|\n", + "| 21|362154|\n", + "| 9|362154|\n", + "| 525|362154|\n", + "| 3|362154|\n", + "| 42|362154|\n", + "| 28|362154|\n", + "| 462|362154|\n", + "| 596|362154|\n", + "| 36|362154|\n", + "| 12|362154|\n", + "| 65|362155|\n", + "| 524|362155|\n", + "| 539|362155|\n", + "| 526|362155|\n", + "| 43|362155|\n", + "| 47|362155|\n", + "| 4|362155|\n", + "| 530|362155|\n", + "| 538|362155|\n", + "| 52|362155|\n", + "| 46|362155|\n", + "| 49|362155|\n", + "| 6|362155|\n", + "| 44|362155|\n", + "| 2|362155|\n", + "| 0|362155|\n", + "| 463|362155|\n", + "| 7|362155|\n", + "| 465|362156|\n", + "| 51|362156|\n", + "| 532|362156|\n", + "| 467|362156|\n", + "| 53|362156|\n", + "| 527|362156|\n", + "| 50|362156|\n", + "| 459|362156|\n", + "| 45|362156|\n", + "| 62|362156|\n", + "| 528|362156|\n", + "| 464|362156|\n", + "| 592|362156|\n", + "| 593|362156|\n", + "| 534|362156|\n", + "| 595|362156|\n", + "| 1|362156|\n", + "| 460|362156|\n", + "| 454|362156|\n", + "| 535|362156|\n", + "| 66|362156|\n", + "| 461|362156|\n", + "| 61|362156|\n", + "| 466|362156|\n", + "| 67|362156|\n", + "| 594|362156|\n", + "| 531|362156|\n", + "| 60|362156|\n", + "| 589|362157|\n", + "| 583|362157|\n", + "| 587|362157|\n", + "| 328|362157|\n", + "| 453|362157|\n", + "| 588|362157|\n", + "| 533|362157|\n", + "| 452|362157|\n", + "| 585|362157|\n", + "| 560|362157|\n", + "| 70|362157|\n", + "| 360|362157|\n", + "| 364|362157|\n", + "| 455|362157|\n", + "| 591|362157|\n", + "| 554|362157|\n", + "| 366|362157|\n", + "| 536|362157|\n", + "| 541|362157|\n", + "| 106|362157|\n", + "| 68|362157|\n", + "| 329|362157|\n", + "| 59|362157|\n", + "| 55|362157|\n", + "| 365|362157|\n", + "| 523|362157|\n", + "| 529|362157|\n", + "| 584|362157|\n", + "| 451|362157|\n", + "| 468|362157|\n", + "| 517|362157|\n", + "| 537|362157|\n", + "| 586|362157|\n", + "| 540|362157|\n", + "| 54|362157|\n", + "| 71|362158|\n", + "| 331|362158|\n", + "| 458|362158|\n", + "| 69|362158|\n", + "| 359|362158|\n", + "| 543|362158|\n", + "| 330|362158|\n", + "| 449|362158|\n", + "| 361|362158|\n", + "| 544|362158|\n", + "| 367|362158|\n", + "| 93|362158|\n", + "| 115|362158|\n", + "| 553|362158|\n", + "| 542|362158|\n", + "| 112|362158|\n", + "| 327|362158|\n", + "| 457|362158|\n", + "| 358|362158|\n", + "| 376|362158|\n", + "| 469|362158|\n", + "| 590|362158|\n", + "| 109|362158|\n", + "| 56|362158|\n", + "| 107|362158|\n", + "| 456|362158|\n", + "| 552|362158|\n", + "| 561|362158|\n", + "| 108|362158|\n", + "| 373|362158|\n", + "| 58|362158|\n", + "| 522|362158|\n", + "| 368|362158|\n", + "| 580|362158|\n", + "| 74|362158|\n", + "| 516|362158|\n", + "| 521|362158|\n", + "| 94|362158|\n", + "| 326|362158|\n", + "| 111|362158|\n", + "| 57|362158|\n", + "| 559|362159|\n", + "| 448|362159|\n", + "| 450|362159|\n", + "| 321|362159|\n", + "| 471|362159|\n", + "| 556|362159|\n", + "| 566|362159|\n", + "| 318|362159|\n", + "| 565|362159|\n", + "| 332|362159|\n", + "| 555|362159|\n", + "| 470|362159|\n", + "| 319|362159|\n", + "| 545|362159|\n", + "| 372|362159|\n", + "| 518|362159|\n", + "| 113|362159|\n", + "| 569|362159|\n", + "| 511|362159|\n", + "| 333|362159|\n", + "| 447|362159|\n", + "| 73|362159|\n", + "| 563|362159|\n", + "| 335|362159|\n", + "| 362|362159|\n", + "| 513|362159|\n", + "| 322|362159|\n", + "| 515|362159|\n", + "| 75|362159|\n", + "| 334|362159|\n", + "| 374|362159|\n", + "| 564|362159|\n", + "| 110|362159|\n", + "| 116|362159|\n", + "| 512|362159|\n", + "| 114|362159|\n", + "| 581|362159|\n", + "| 72|362159|\n", + "| 557|362159|\n", + "| 363|362159|\n", + "| 551|362159|\n", + "| 562|362159|\n", + "| 377|362159|\n", + "| 582|362159|\n", + "| 324|362159|\n", + "| 336|362159|\n", + "| 317|362159|\n", + "| 76|362160|\n", + "| 77|362160|\n", + "| 558|362160|\n", + "| 476|362160|\n", + "| 325|362160|\n", + "| 550|362160|\n", + "| 375|362160|\n", + "| 339|362160|\n", + "| 547|362160|\n", + "| 475|362160|\n", + "| 315|362160|\n", + "| 87|362160|\n", + "| 79|362160|\n", + "| 519|362160|\n", + "| 96|362160|\n", + "| 91|362160|\n", + "| 92|362160|\n", + "| 356|362160|\n", + "| 510|362160|\n", + "| 95|362160|\n", + "| 338|362160|\n", + "| 78|362160|\n", + "| 378|362160|\n", + "| 370|362160|\n", + "| 105|362160|\n", + "| 369|362160|\n", + "| 508|362160|\n", + "| 567|362160|\n", + "| 118|362160|\n", + "| 473|362160|\n", + "| 337|362160|\n", + "| 520|362160|\n", + "| 357|362160|\n", + "| 88|362160|\n", + "| 89|362160|\n", + "| 90|362160|\n", + "| 320|362160|\n", + "| 549|362160|\n", + "| 514|362160|\n", + "| 117|362161|\n", + "| 102|362161|\n", + "| 101|362161|\n", + "| 119|362161|\n", + "| 371|362161|\n", + "| 120|362161|\n", + "| 100|362161|\n", + "| 579|362161|\n", + "| 578|362161|\n", + "| 509|362161|\n", + "| 477|362161|\n", + "| 492|362161|\n", + "| 323|362161|\n", + "| 478|362161|\n", + "| 505|362161|\n", + "| 506|362161|\n", + "| 503|362161|\n", + "| 474|362161|\n", + "| 572|362161|\n", + "| 546|362161|\n", + "| 570|362161|\n", + "| 446|362161|\n", + "| 97|362161|\n", + "| 576|362161|\n", + "| 472|362161|\n", + "| 490|362161|\n", + "| 379|362161|\n", + "| 316|362161|\n", + "| 571|362161|\n", + "| 548|362161|\n", + "| 104|362161|\n", + "| 577|362161|\n", + "| 99|362161|\n", + "| 502|362162|\n", + "| 86|362162|\n", + "| 340|362162|\n", + "| 479|362162|\n", + "| 346|362162|\n", + "| 121|362162|\n", + "| 352|362162|\n", + "| 353|362162|\n", + "| 83|362162|\n", + "| 313|362162|\n", + "| 499|362162|\n", + "| 208|362162|\n", + "| 493|362162|\n", + "| 507|362162|\n", + "| 81|362162|\n", + "| 80|362162|\n", + "| 491|362162|\n", + "| 98|362162|\n", + "| 504|362162|\n", + "| 355|362162|\n", + "| 341|362162|\n", + "| 381|362162|\n", + "| 103|362162|\n", + "| 351|362162|\n", + "| 573|362162|\n", + "| 568|362162|\n", + "| 445|362162|\n", + "| 380|362162|\n", + "| 144|362162|\n", + "| 486|362162|\n", + "| 497|362162|\n", + "| 350|362162|\n", + "| 386|362163|\n", + "| 383|362163|\n", + "| 205|362163|\n", + "| 84|362163|\n", + "| 495|362163|\n", + "| 82|362163|\n", + "| 305|362163|\n", + "| 214|362163|\n", + "| 221|362163|\n", + "| 145|362163|\n", + "| 382|362163|\n", + "| 207|362163|\n", + "| 122|362163|\n", + "| 498|362163|\n", + "| 206|362163|\n", + "| 309|362163|\n", + "| 342|362163|\n", + "| 220|362163|\n", + "| 127|362163|\n", + "| 85|362163|\n", + "| 314|362163|\n", + "| 501|362163|\n", + "| 300|362163|\n", + "| 308|362163|\n", + "| 500|362163|\n", + "| 312|362163|\n", + "| 394|362163|\n", + "| 146|362163|\n", + "| 489|362163|\n", + "| 228|362163|\n", + "| 304|362163|\n", + "| 354|362163|\n", + "| 574|362163|\n", + "| 480|362163|\n", + "| 349|362163|\n", + "| 494|362163|\n", + "| 215|362163|\n", + "| 129|362164|\n", + "| 390|362164|\n", + "| 147|362164|\n", + "| 393|362164|\n", + "| 128|362164|\n", + "| 210|362164|\n", + "| 209|362164|\n", + "| 216|362164|\n", + "| 233|362164|\n", + "| 487|362164|\n", + "| 444|362164|\n", + "| 348|362164|\n", + "| 345|362164|\n", + "| 496|362164|\n", + "| 303|362164|\n", + "| 387|362164|\n", + "| 123|362164|\n", + "| 301|362164|\n", + "| 204|362164|\n", + "| 137|362164|\n", + "| 138|362164|\n", + "| 306|362164|\n", + "| 229|362164|\n", + "| 481|362164|\n", + "| 385|362164|\n", + "| 395|362164|\n", + "| 343|362164|\n", + "| 347|362164|\n", + "| 488|362164|\n", + "| 155|362164|\n", + "| 575|362164|\n", + "| 211|362164|\n", + "| 302|362164|\n", + "| 311|362164|\n", + "| 219|362164|\n", + "| 217|362164|\n", + "| 307|362164|\n", + "| 124|362165|\n", + "| 287|362165|\n", + "| 154|362165|\n", + "| 297|362165|\n", + "| 299|362165|\n", + "| 389|362165|\n", + "| 234|362165|\n", + "| 126|362165|\n", + "| 443|362165|\n", + "| 396|362165|\n", + "| 482|362165|\n", + "| 151|362165|\n", + "| 213|362165|\n", + "| 222|362165|\n", + "| 152|362165|\n", + "| 218|362165|\n", + "| 156|362165|\n", + "| 344|362165|\n", + "| 310|362165|\n", + "| 485|362165|\n", + "| 140|362165|\n", + "| 153|362165|\n", + "| 388|362165|\n", + "| 230|362165|\n", + "| 392|362165|\n", + "| 223|362165|\n", + "| 232|362165|\n", + "| 125|362165|\n", + "| 212|362165|\n", + "| 148|362165|\n", + "| 131|362165|\n", + "| 149|362165|\n", + "| 384|362165|\n", + "| 441|362165|\n", + "| 130|362165|\n", + "| 400|362166|\n", + "| 225|362166|\n", + "| 132|362166|\n", + "| 136|362166|\n", + "| 235|362166|\n", + "| 227|362166|\n", + "| 157|362166|\n", + "| 397|362166|\n", + "| 133|362166|\n", + "| 163|362166|\n", + "| 291|362166|\n", + "| 440|362166|\n", + "| 298|362166|\n", + "| 231|362166|\n", + "| 236|362166|\n", + "| 142|362166|\n", + "| 294|362166|\n", + "| 160|362166|\n", + "| 238|362166|\n", + "| 139|362166|\n", + "| 391|362166|\n", + "| 403|362166|\n", + "| 237|362166|\n", + "| 484|362166|\n", + "| 296|362166|\n", + "| 226|362166|\n", + "| 150|362166|\n", + "| 134|362166|\n", + "| 483|362166|\n", + "| 224|362166|\n", + "| 401|362166|\n", + "| 164|362166|\n", + "| 166|362166|\n", + "| 398|362166|\n", + "| 292|362166|\n", + "| 442|362166|\n", + "| 165|362167|\n", + "| 295|362167|\n", + "| 438|362167|\n", + "| 239|362167|\n", + "| 167|362167|\n", + "| 265|362167|\n", + "| 141|362167|\n", + "| 402|362167|\n", + "| 161|362167|\n", + "| 431|362167|\n", + "| 288|362167|\n", + "| 293|362167|\n", + "| 404|362167|\n", + "| 439|362167|\n", + "| 430|362167|\n", + "| 289|362167|\n", + "| 437|362167|\n", + "| 135|362167|\n", + "| 158|362167|\n", + "| 143|362167|\n", + "| 410|362167|\n", + "| 399|362167|\n", + "| 290|362167|\n", + "| 168|362167|\n", + "| 162|362167|\n", + "| 169|362168|\n", + "| 241|362168|\n", + "| 170|362168|\n", + "| 432|362168|\n", + "| 243|362168|\n", + "| 415|362168|\n", + "| 258|362168|\n", + "| 240|362168|\n", + "| 411|362168|\n", + "| 433|362168|\n", + "| 254|362168|\n", + "| 436|362168|\n", + "| 405|362168|\n", + "| 416|362168|\n", + "| 417|362168|\n", + "| 407|362168|\n", + "| 159|362168|\n", + "| 262|362168|\n", + "| 285|362168|\n", + "| 203|362168|\n", + "| 414|362168|\n", + "| 286|362168|\n", + "| 242|362168|\n", + "| 406|362168|\n", + "| 264|362169|\n", + "| 255|362169|\n", + "| 412|362169|\n", + "| 266|362169|\n", + "| 409|362169|\n", + "| 434|362169|\n", + "| 419|362169|\n", + "| 263|362169|\n", + "| 178|362169|\n", + "| 267|362169|\n", + "| 171|362169|\n", + "| 249|362169|\n", + "| 245|362169|\n", + "| 256|362169|\n", + "| 413|362169|\n", + "| 175|362169|\n", + "| 260|362169|\n", + "| 253|362169|\n", + "| 284|362169|\n", + "| 250|362169|\n", + "| 418|362169|\n", + "| 261|362169|\n", + "| 173|362169|\n", + "| 435|362169|\n", + "| 246|362170|\n", + "| 280|362170|\n", + "| 408|362170|\n", + "| 179|362170|\n", + "| 202|362170|\n", + "| 259|362170|\n", + "| 281|362170|\n", + "| 172|362170|\n", + "| 252|362170|\n", + "| 422|362170|\n", + "| 244|362170|\n", + "| 429|362170|\n", + "| 247|362170|\n", + "| 278|362170|\n", + "| 248|362170|\n", + "| 199|362170|\n", + "| 282|362170|\n", + "| 420|362170|\n", + "| 174|362170|\n", + "| 176|362171|\n", + "| 283|362171|\n", + "| 182|362171|\n", + "| 269|362171|\n", + "| 279|362171|\n", + "| 423|362171|\n", + "| 251|362171|\n", + "| 177|362171|\n", + "| 197|362171|\n", + "| 180|362171|\n", + "| 257|362171|\n", + "| 198|362171|\n", + "| 428|362171|\n", + "| 427|362171|\n", + "| 268|362171|\n", + "| 200|362171|\n", + "| 425|362171|\n", + "| 276|362171|\n", + "| 274|362171|\n", + "| 270|362171|\n", + "| 275|362171|\n", + "| 271|362171|\n", + "| 201|362171|\n", + "| 421|362171|\n", + "| 277|362172|\n", + "| 181|362172|\n", + "| 424|362172|\n", + "| 426|362172|\n", + "| 183|362172|\n", + "| 272|362172|\n", + "| 184|362172|\n", + "| 186|362172|\n", + "| 273|362173|\n", + "| 190|362173|\n", + "| 185|362173|\n", + "| 192|362174|\n", + "| 187|362174|\n", + "| 191|362174|\n", + "| 193|362175|\n", + "| 194|362175|\n", + "| 189|362175|\n", + "| 196|362175|\n", + "| 188|362175|\n", + "| 195|362175|\n", + "+-----------+------+\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# repartitioning to 600 partitions, seems to be balanced now. \n", + "df_all = df_all.repartition(600)\n", + "displaypartitions(df_all)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4b98e4e2-0cf7-406d-8b64-b47e5829e40e", + "metadata": {}, + "outputs": [], + "source": [ + "# we will need a year column in this model:\n", + "df_all = df_all.withColumn('year', F.year(df_all.start_timestamp))" + ] + }, + { + "cell_type": "markdown", + "id": "1b8404b4-c67e-4c8f-a699-40c6fef660e5", + "metadata": {}, + "source": [ + "## Next steps" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "41122437-d77b-4fa6-91f8-22126f875a52", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "access @ file:///home/conda/feedstock_root/build_artifacts/access_1696558639912/work\n", + "affine @ file:///home/conda/feedstock_root/build_artifacts/affine_1674245120525/work\n", + "aiohttp @ file:///home/conda/feedstock_root/build_artifacts/aiohttp_1696765416168/work\n", + "aiosignal @ file:///home/conda/feedstock_root/build_artifacts/aiosignal_1667935791922/work\n", + "alabaster @ file:///home/conda/feedstock_root/build_artifacts/alabaster_1673645646525/work\n", + "alembic @ file:///home/conda/feedstock_root/build_artifacts/alembic_1698347477885/work\n", + "amply @ file:///home/conda/feedstock_root/build_artifacts/amply_1687675480808/work\n", + "ansiwrap==0.8.4\n", + "anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist\n", + "appdirs @ file:///home/conda/feedstock_root/build_artifacts/appdirs_1603108395799/work\n", + "argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work\n", + "argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1695386548039/work\n", + "arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1696128962909/work\n", + "astroid @ file:///home/conda/feedstock_root/build_artifacts/astroid_1697450283802/work\n", + "asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work\n", + "async-generator==1.10\n", + "async-timeout @ file:///home/conda/feedstock_root/build_artifacts/async-timeout_1691763562544/work\n", + "atomicwrites @ file:///home/conda/feedstock_root/build_artifacts/atomicwrites_1657325823582/work\n", + "attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work\n", + "autopep8 @ file:///home/conda/feedstock_root/build_artifacts/autopep8_1615918605177/work\n", + "Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1698174530262/work\n", + "backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work\n", + "backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work\n", + "bcolz==1.2.1\n", + "beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work\n", + "binaryornot==0.4.4\n", + "black @ file:///home/conda/feedstock_root/build_artifacts/black-recipe_1622561163993/work\n", + "bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1696630167146/work\n", + "blinker @ file:///home/conda/feedstock_root/build_artifacts/blinker_1698890160476/work\n", + "bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1652969564918/work\n", + "branca @ file:///home/conda/feedstock_root/build_artifacts/branca_1699295994965/work\n", + "Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1687884021435/work\n", + "brotlipy @ file:///home/conda/feedstock_root/build_artifacts/brotlipy_1695621656497/work\n", + "cachetools==4.2.4\n", + "certifi==2023.7.22\n", + "certipy==0.1.3\n", + "cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1696001773319/work\n", + "chardet @ file:///home/conda/feedstock_root/build_artifacts/chardet_1649184112677/work\n", + "charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1698833585322/work\n", + "click==7.1.2\n", + "click-plugins==1.1.1\n", + "cligj @ file:///home/conda/feedstock_root/build_artifacts/cligj_1633637764473/work\n", + "cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpickle_1697464713350/work\n", + "colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work\n", + "conda==4.9.2\n", + "conda-package-handling @ file:///home/conda/feedstock_root/build_artifacts/conda-package-handling_1691048088238/work\n", + "conda_package_streaming @ file:///home/conda/feedstock_root/build_artifacts/conda-package-streaming_1691009212940/work\n", + "confuse @ file:///home/conda/feedstock_root/build_artifacts/confuse_1680699073356/work\n", + "cookiecutter @ file:///home/conda/feedstock_root/build_artifacts/cookiecutter_1643669229020/work\n", + "coverage @ file:///home/conda/feedstock_root/build_artifacts/coverage_1696281775256/work\n", + "cryptography @ file:///home/conda/feedstock_root/build_artifacts/cryptography-split_1672672380968/work\n", + "cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work\n", + "Cython @ file:///home/conda/feedstock_root/build_artifacts/cython_1695285659207/work\n", + "cytoolz @ file:///home/conda/feedstock_root/build_artifacts/cytoolz_1695545170008/work\n", + "dask @ file:///home/conda/feedstock_root/build_artifacts/dask-core_1607657054678/work\n", + "dataclasses @ file:///home/conda/feedstock_root/build_artifacts/dataclasses_1628958434797/work\n", + "db-dtypes==1.1.1\n", + "debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534280282/work\n", + "decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work\n", + "defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work\n", + "deprecation @ file:///home/conda/feedstock_root/build_artifacts/deprecation_1589881437857/work\n", + "descartes==1.1.0\n", + "diff-match-patch @ file:///home/conda/feedstock_root/build_artifacts/diff-match-patch_1683670697993/work\n", + "dill @ file:///home/conda/feedstock_root/build_artifacts/dill_1690101045195/work\n", + "distlib @ file:///home/conda/feedstock_root/build_artifacts/distlib_1689598491484/work\n", + "distributed @ file:///home/conda/feedstock_root/build_artifacts/distributed_1611361822694/work\n", + "docutils @ file:///home/conda/feedstock_root/build_artifacts/docutils_1695300443287/work\n", + "entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work\n", + "esda @ file:///home/conda/feedstock_root/build_artifacts/esda_1660931045600/work\n", + "exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work\n", + "executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work\n", + "fastavro @ file:///home/conda/feedstock_root/build_artifacts/fastavro_1652900770404/work\n", + "fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1696171779618/work/dist\n", + "fastparquet @ file:///home/conda/feedstock_root/build_artifacts/fastparquet_1619039245868/work\n", + "filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1698714947081/work\n", + "findspark @ file:///home/conda/feedstock_root/build_artifacts/findspark_1644599740637/work\n", + "Fiona @ file:///home/conda/feedstock_root/build_artifacts/fiona_1653911984590/work\n", + "flake8 @ file:///home/conda/feedstock_root/build_artifacts/flake8_1601874335748/work\n", + "folium @ file:///home/conda/feedstock_root/build_artifacts/folium_1699298670193/work\n", + "frozenlist @ file:///home/conda/feedstock_root/build_artifacts/frozenlist_1695377782835/work\n", + "fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1618579848600/work\n", + "future @ file:///home/conda/feedstock_root/build_artifacts/future_1673596611778/work\n", + "gcsfs @ file:///home/conda/feedstock_root/build_artifacts/gcsfs_1618251324500/work\n", + "GDAL==3.5.0\n", + "geopandas @ file:///home/conda/feedstock_root/build_artifacts/geopandas_1686057576800/work\n", + "giddy @ file:///home/conda/feedstock_root/build_artifacts/giddy_1696344753517/work\n", + "gitdb @ file:///home/conda/feedstock_root/build_artifacts/gitdb_1697791558612/work\n", + "GitPython @ file:///home/conda/feedstock_root/build_artifacts/gitpython_1697650329377/work\n", + "gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808683138/work\n", + "google-api-core==1.34.0\n", + "google-auth==1.35.0\n", + "google-auth-oauthlib==0.5.3\n", + "google-cloud-bigquery==3.13.0\n", + "google-cloud-bigquery-storage==2.1.0\n", + "google-cloud-bigtable==1.6.1\n", + "google-cloud-container==2.3.1\n", + "google-cloud-core==2.3.3\n", + "google-cloud-datacatalog==3.0.0\n", + "google-cloud-dataproc==2.2.0\n", + "google-cloud-datastore==2.1.6\n", + "google-cloud-language==2.0.0\n", + "google-cloud-logging==2.1.1\n", + "google-cloud-monitoring==2.0.1\n", + "google-cloud-pubsub==2.2.0\n", + "google-cloud-redis==2.0.0\n", + "google-cloud-spanner==2.1.1\n", + "google-cloud-speech==2.0.1\n", + "google-cloud-storage==2.11.0\n", + "google-cloud-texttospeech==2.2.0\n", + "google-cloud-translate==3.0.2\n", + "google-cloud-vision==2.0.0\n", + "google-crc32c==1.5.0\n", + "google-resumable-media==2.6.0\n", + "googleapis-common-protos==1.61.0\n", + "googlemaps==4.10.0\n", + "graphframes==0.6\n", + "greenlet @ file:///home/conda/feedstock_root/build_artifacts/greenlet_1698243377683/work\n", + "grpc-google-iam-v1==0.12.7\n", + "grpcio==1.59.2\n", + "grpcio-status==1.48.2\n", + "htmlmin==0.1.12\n", + "httplib2 @ file:///home/conda/feedstock_root/build_artifacts/httplib2_1617134439639/work\n", + "idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1593328102638/work\n", + "imagecodecs @ file:///home/conda/feedstock_root/build_artifacts/imagecodecs_1662930206934/work\n", + "ImageHash @ file:///home/conda/feedstock_root/build_artifacts/imagehash_1664371213222/work\n", + "imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1696854106455/work\n", + "imagesize @ file:///home/conda/feedstock_root/build_artifacts/imagesize_1656939531508/work\n", + "importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work\n", + "importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1699364556997/work\n", + "inequality==1.0.0\n", + "inflection @ file:///home/conda/feedstock_root/build_artifacts/inflection_1598089801258/work\n", + "iniconfig @ file:///home/conda/feedstock_root/build_artifacts/iniconfig_1673103042956/work\n", + "intervaltree @ file:///home/conda/feedstock_root/build_artifacts/intervaltree_1683532206518/work\n", + "ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1655241626755/work\n", + "ipyparallel @ file:///home/conda/feedstock_root/build_artifacts/ipyparallel_1607986704956/work\n", + "ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1683289033986/work\n", + "ipython-genutils==0.2.0\n", + "ipython-sql @ file:///home/conda/feedstock_root/build_artifacts/ipython-sql_1602667917966/work\n", + "ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1660942226216/work\n", + "isort @ file:///home/conda/feedstock_root/build_artifacts/isort_1675033873689/work\n", + "jaraco.classes @ file:///home/conda/feedstock_root/build_artifacts/jaraco.classes_1689112411129/work\n", + "jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1635823949331/work\n", + "jeepney @ file:///home/conda/feedstock_root/build_artifacts/jeepney_1649085214306/work\n", + "Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1636510082894/work\n", + "jinja2-time @ file:///home/conda/feedstock_root/build_artifacts/jinja2-time_1646750632133/work\n", + "joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1691577114857/work\n", + "json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work\n", + "jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1698678498820/work\n", + "jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1689701150890/work\n", + "jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1649327809992/work\n", + "jupyter-contrib-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_core_1657548529421/work\n", + "jupyter-contrib-nbextensions @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_nbextensions_1602805456242/work\n", + "jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1652365251650/work\n", + "# Editable install with no version control (jupyter-gcs-contents-manager==0.0.1)\n", + "-e /opt/dataproc/jupyter/jupyter-extensions-67f08e19469494ace1b953c515b09ae960c1a4ec/jupyter-gcs-contents-manager\n", + "jupyter-highlight-selected-word @ file:///home/conda/feedstock_root/build_artifacts/jupyter_highlight_selected_word_1638382841351/work\n", + "jupyter-http-over-ws @ file:///home/conda/feedstock_root/build_artifacts/jupyter_http_over_ws_1597332535364/work\n", + "jupyter-latex-envs @ file:///home/conda/feedstock_root/build_artifacts/jupyter_latex_envs_1614808832269/work\n", + "jupyter-nbextensions-configurator @ file:///home/conda/feedstock_root/build_artifacts/jupyter_nbextensions_configurator_1670793770953/work\n", + "jupyter-server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1647940913071/work\n", + "jupyter-server-mathjax @ file:///home/conda/feedstock_root/build_artifacts/jupyter-server-mathjax_1672324512570/work\n", + "jupyter-telemetry @ file:///home/conda/feedstock_root/build_artifacts/jupyter_telemetry_1605173804246/work\n", + "jupyterhub @ file:///home/conda/feedstock_root/build_artifacts/jupyterhub-feedstock_1614255305026/work\n", + "jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1632809509349/work\n", + "jupyterlab-git @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab-git_1620032639379/work\n", + "jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work\n", + "jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1631590465624/work\n", + "jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1671827361623/work\n", + "kaggle==1.5.16\n", + "keyring @ file:///home/conda/feedstock_root/build_artifacts/keyring_1696001522137/work\n", + "kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/kiwisolver_1695379923772/work\n", + "koalas @ file:///home/conda/feedstock_root/build_artifacts/koalas_1605320953654/work\n", + "libcst==1.1.0\n", + "libpysal @ file:///home/conda/feedstock_root/build_artifacts/libpysal_1668782270408/work\n", + "llvmlite==0.36.0\n", + "locket @ file:///home/conda/feedstock_root/build_artifacts/locket_1650660393415/work\n", + "lxml @ file:///home/conda/feedstock_root/build_artifacts/lxml_1649697664536/work\n", + "Mako @ file:///home/conda/feedstock_root/build_artifacts/mako_1699482234420/work\n", + "mamba @ file:///home/conda/feedstock_root/build_artifacts/mamba_1629310321864/work\n", + "mapclassify @ file:///home/conda/feedstock_root/build_artifacts/mapclassify_1673861555770/work\n", + "Markdown @ file:///home/conda/feedstock_root/build_artifacts/markdown_1651821407140/work\n", + "MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1695367437975/work\n", + "matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1632416634429/work\n", + "matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work\n", + "mccabe==0.6.1\n", + "metakernel @ file:///home/conda/feedstock_root/build_artifacts/metakernel_1648594625035/work\n", + "mgwr @ file:///home/conda/feedstock_root/build_artifacts/mgwr_1696605875605/work\n", + "missingno==0.4.2\n", + "mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1635844675081/work\n", + "mock @ file:///home/conda/feedstock_root/build_artifacts/mock_1689092066756/work\n", + "more-itertools @ file:///home/conda/feedstock_root/build_artifacts/more-itertools_1691086935839/work\n", + "mpmath @ file:///home/conda/feedstock_root/build_artifacts/mpmath_1678228039184/work\n", + "msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1695464102412/work\n", + "multidict @ file:///home/conda/feedstock_root/build_artifacts/multidict_1696716067907/work\n", + "munch @ file:///home/conda/feedstock_root/build_artifacts/munch_1688318326844/work\n", + "mypy-extensions @ file:///home/conda/feedstock_root/build_artifacts/mypy_extensions_1675543315189/work\n", + "nbclassic @ file:///home/conda/feedstock_root/build_artifacts/nbclassic_1682598306082/work\n", + "nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1646999386773/work\n", + "nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1605401836768/work\n", + "nbdime @ file:///home/conda/feedstock_root/build_artifacts/nbdime_1618448032595/work\n", + "nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1690814868471/work\n", + "nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1697083700168/work\n", + "networkx @ file:///home/conda/feedstock_root/build_artifacts/networkx_1680692919326/work\n", + "nltk @ file:///home/conda/feedstock_root/build_artifacts/nltk_1633093058893/work\n", + "nose==1.3.7\n", + "notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1610575313697/work\n", + "notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work\n", + "numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1623568544775/work\n", + "numexpr @ file:///home/conda/feedstock_root/build_artifacts/numexpr_1658076426113/work\n", + "numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1649281352817/work\n", + "numpydoc @ file:///home/conda/feedstock_root/build_artifacts/numpydoc_1665273484262/work\n", + "oauth2client==4.1.3\n", + "oauthlib @ file:///home/conda/feedstock_root/build_artifacts/oauthlib_1666056362788/work\n", + "opendatasets==0.1.22\n", + "packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work\n", + "pamela @ file:///home/conda/feedstock_root/build_artifacts/pamela_1691565434937/work\n", + "pandas==1.2.5\n", + "pandas-profiling @ file:///home/conda/feedstock_root/build_artifacts/pandas-profiling_1613839428900/work\n", + "pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work\n", + "papermill @ file:///home/conda/feedstock_root/build_artifacts/papermill_1604950649566/work\n", + "parso==0.7.0\n", + "partd @ file:///home/conda/feedstock_root/build_artifacts/partd_1695667515973/work\n", + "pathspec @ file:///home/conda/feedstock_root/build_artifacts/pathspec_1690597952537/work\n", + "patsy @ file:///home/conda/feedstock_root/build_artifacts/patsy_1665356157073/work\n", + "pexpect==4.8.0\n", + "phik @ file:///home/conda/feedstock_root/build_artifacts/phik_1697266240235/work\n", + "pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602535658641/work\n", + "Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1666920566244/work\n", + "pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work\n", + "platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1699715570510/work\n", + "pluggy @ file:///home/conda/feedstock_root/build_artifacts/pluggy_1693086607691/work\n", + "pointpats @ file:///home/conda/feedstock_root/build_artifacts/pointpats_1678201881705/work\n", + "pooch @ file:///home/conda/feedstock_root/build_artifacts/pooch_1698245576425/work\n", + "portalocker @ file:///home/conda/feedstock_root/build_artifacts/portalocker_1695662050140/work\n", + "poyo==0.5.0\n", + "prettytable @ file:///home/conda/feedstock_root/build_artifacts/prettytable_1694464263010/work\n", + "prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1698692549203/work\n", + "prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1699631011458/work\n", + "proto-plus==1.22.3\n", + "protobuf==3.20.3\n", + "psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1695367190297/work\n", + "ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl\n", + "PuLP @ file:///home/conda/feedstock_root/build_artifacts/pulp_1695847465904/work\n", + "pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work\n", + "pure-sasl @ file:///home/conda/feedstock_root/build_artifacts/pure-sasl_1631890804823/work\n", + "py4j==0.10.9\n", + "pyarrow==14.0.1\n", + "pyasn1 @ file:///home/conda/feedstock_root/build_artifacts/pyasn1_1694615621498/work\n", + "pyasn1-modules @ file:///home/conda/feedstock_root/build_artifacts/pyasn1-modules_1695107857548/work\n", + "pycodestyle @ file:///home/conda/feedstock_root/build_artifacts/pycodestyle_1589305246696/work\n", + "pycosat @ file:///home/conda/feedstock_root/build_artifacts/pycosat_1696355775111/work\n", + "pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work\n", + "pycurl==7.45.1\n", + "pydocstyle @ file:///home/conda/feedstock_root/build_artifacts/pydocstyle_1673997487070/work\n", + "pydot @ file:///home/conda/feedstock_root/build_artifacts/pydot_1695469127091/work\n", + "pyflakes==2.2.0\n", + "Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work\n", + "PyHive @ file:///home/conda/feedstock_root/build_artifacts/pyhive_1646707521362/work\n", + "PyJWT @ file:///home/conda/feedstock_root/build_artifacts/pyjwt_1689721553971/work\n", + "pylint @ file:///home/conda/feedstock_root/build_artifacts/pylint_1698005019851/work\n", + "pyls-black @ file:///home/conda/feedstock_root/build_artifacts/pyls-black_1595615126037/work\n", + "pyls-spyder @ file:///home/conda/feedstock_root/build_artifacts/pyls-spyder_1613487177406/work\n", + "pyOpenSSL @ file:///home/conda/feedstock_root/build_artifacts/pyopenssl_1685514481738/work\n", + "pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1635267989520/work\n", + "pyproj @ file:///home/conda/feedstock_root/build_artifacts/pyproj_1650803108421/work\n", + "PyQt5==5.12.3\n", + "PyQt5_sip==4.19.18\n", + "PyQtChart==5.12\n", + "PyQtWebEngine==5.12.1\n", + "pysal @ file:///home/conda/feedstock_root/build_artifacts/pysal_1612819487814/work\n", + "PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work\n", + "# Editable install with no version control (pyspark==3.1.3)\n", + "-e /usr/lib/spark/python\n", + "pytest @ file:///home/conda/feedstock_root/build_artifacts/pytest_1698233724984/work\n", + "pytest-cov @ file:///home/conda/feedstock_root/build_artifacts/pytest-cov_1684964868191/work\n", + "python-dateutil==2.8.0\n", + "python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work\n", + "python-jsonrpc-server @ file:///home/conda/feedstock_root/build_artifacts/python-jsonrpc-server_1599827444631/work\n", + "python-language-server @ file:///home/conda/feedstock_root/build_artifacts/python-language-server_1607720213724/work\n", + "python-slugify @ file:///home/conda/feedstock_root/build_artifacts/python-slugify-split_1694282063120/work\n", + "pytoolconfig @ file:///home/conda/feedstock_root/build_artifacts/pytoolconfig_1675124745143/work\n", + "pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work\n", + "pyu2f @ file:///home/conda/feedstock_root/build_artifacts/pyu2f_1604248910016/work\n", + "PyWavelets @ file:///home/conda/feedstock_root/build_artifacts/pywavelets_1649616412805/work\n", + "pyxdg @ file:///home/conda/feedstock_root/build_artifacts/pyxdg_1654536799286/work\n", + "PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373436676/work\n", + "pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1698062423217/work\n", + "QDarkStyle @ file:///home/conda/feedstock_root/build_artifacts/qdarkstyle_1617328841504/work\n", + "qstylizer @ file:///home/conda/feedstock_root/build_artifacts/qstylizer_1662244505808/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl\n", + "QtAwesome @ file:///home/conda/feedstock_root/build_artifacts/qtawesome_1678418951316/work\n", + "qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1699244156891/work\n", + "QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1698112029416/work\n", + "quantecon @ file:///home/conda/feedstock_root/build_artifacts/quantecon_1655746571862/work\n", + "rasterio @ file:///home/conda/feedstock_root/build_artifacts/rasterio_1655388667652/work\n", + "rasterstats @ file:///home/conda/feedstock_root/build_artifacts/rasterstats_1685447679213/work\n", + "referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1691337268233/work\n", + "regex @ file:///home/conda/feedstock_root/build_artifacts/regex_1617644422046/work\n", + "requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1608156231189/work\n", + "requests-oauthlib @ file:///home/conda/feedstock_root/build_artifacts/requests-oauthlib_1643557462909/work\n", + "rope @ file:///home/conda/feedstock_root/build_artifacts/rope_1699525256910/work\n", + "rpds-py @ file:///home/conda/feedstock_root/build_artifacts/rpds-py_1699109843138/work\n", + "rsa @ file:///home/conda/feedstock_root/build_artifacts/rsa_1658328885051/work\n", + "Rtree @ file:///home/conda/feedstock_root/build_artifacts/rtree_1637430736605/work\n", + "ruamel-yaml-conda @ file:///home/conda/feedstock_root/build_artifacts/ruamel_yaml_1695546195936/work\n", + "ruamel.yaml @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml_1699007344708/work\n", + "ruamel.yaml.clib @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml.clib_1695996844669/work\n", + "scikit-image @ file:///home/conda/feedstock_root/build_artifacts/scikit-image_1638363134145/work\n", + "scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1630910537183/work\n", + "scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1619561901336/work\n", + "seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-split_1629095986539/work\n", + "SecretStorage @ file:///home/conda/feedstock_root/build_artifacts/secretstorage_1695551746400/work\n", + "segregation @ file:///home/conda/feedstock_root/build_artifacts/segregation_1696427305843/work\n", + "Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1682601222253/work\n", + "Shapely @ file:///home/conda/feedstock_root/build_artifacts/shapely_1651793098501/work\n", + "simplejson @ file:///home/conda/feedstock_root/build_artifacts/simplejson_1696595967770/work\n", + "six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work\n", + "smmap @ file:///home/conda/feedstock_root/build_artifacts/smmap_1634310307496/work\n", + "sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work\n", + "snowballstemmer @ file:///home/conda/feedstock_root/build_artifacts/snowballstemmer_1637143057757/work\n", + "snuggs==1.4.7\n", + "sortedcontainers @ file:///home/conda/feedstock_root/build_artifacts/sortedcontainers_1621217038088/work\n", + "soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work\n", + "spaghetti @ file:///home/conda/feedstock_root/build_artifacts/spaghetti_1696295637619/work\n", + "spark-nlp==4.4.0\n", + "spark-nlp-display==4.4\n", + "spglm @ file:///home/conda/feedstock_root/build_artifacts/spglm_1698250481025/work\n", + "Sphinx @ file:///home/conda/feedstock_root/build_artifacts/sphinx_1690955392406/work\n", + "sphinxcontrib-applehelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-applehelp_1674487779667/work\n", + "sphinxcontrib-devhelp==1.0.2\n", + "sphinxcontrib-htmlhelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-htmlhelp_1675256494457/work\n", + "sphinxcontrib-jsmath @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-jsmath_1691604704163/work\n", + "sphinxcontrib-qthelp==1.0.3\n", + "sphinxcontrib-serializinghtml @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-serializinghtml_1649380998999/work\n", + "spint @ file:///home/conda/feedstock_root/build_artifacts/spint_1696602360359/work\n", + "splot @ file:///home/conda/feedstock_root/build_artifacts/splot_1649898658322/work\n", + "spopt @ file:///home/conda/feedstock_root/build_artifacts/spopt_1655150061954/work\n", + "spreg @ file:///home/conda/feedstock_root/build_artifacts/spreg_1695792092600/work\n", + "spvcm @ file:///home/conda/feedstock_root/build_artifacts/spvcm_1696623913651/work\n", + "spyder @ file:///home/conda/feedstock_root/build_artifacts/spyder_1627140945937/work\n", + "spyder-kernels @ file:///home/conda/feedstock_root/build_artifacts/spyder-kernels_1625331173960/work\n", + "spylon==0.3.0\n", + "spylon-kernel==0.4.1\n", + "SQLAlchemy @ file:///home/conda/feedstock_root/build_artifacts/sqlalchemy_1697018588089/work\n", + "sqlparse @ file:///home/conda/feedstock_root/build_artifacts/sqlparse_1681817562700/work\n", + "stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work\n", + "statsmodels @ file:///home/conda/feedstock_root/build_artifacts/statsmodels_1654787099639/work\n", + "svgwrite==1.4\n", + "sympy @ file:///home/conda/feedstock_root/build_artifacts/sympy_1618015367433/work\n", + "tables @ file:///home/conda/feedstock_root/build_artifacts/pytables_1638208858826/work\n", + "tangled-up-in-unicode @ file:///home/conda/feedstock_root/build_artifacts/tangled-up-in-unicode_1632832610704/work\n", + "tblib @ file:///home/conda/feedstock_root/build_artifacts/tblib_1694702375735/work\n", + "tenacity @ file:///home/conda/feedstock_root/build_artifacts/tenacity_1692026804430/work\n", + "terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1699810101464/work\n", + "testpath @ file:///home/conda/feedstock_root/build_artifacts/testpath_1645693042223/work\n", + "text-unidecode @ file:///home/conda/feedstock_root/build_artifacts/text-unidecode_1694707102786/work\n", + "textdistance @ file:///home/conda/feedstock_root/build_artifacts/textdistance_1663527496115/work\n", + "textwrap3==0.9.2\n", + "threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1689261241048/work\n", + "three-merge @ file:///home/conda/feedstock_root/build_artifacts/three-merge_1595515817927/work\n", + "thrift @ file:///home/conda/feedstock_root/build_artifacts/thrift_1695546065194/work/lib/py\n", + "thrift-sasl @ file:///home/conda/feedstock_root/build_artifacts/thrift_sasl_1631824374965/work\n", + "tifffile @ file:///home/conda/feedstock_root/build_artifacts/tifffile_1665588749940/work\n", + "tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work\n", + "tobler @ file:///home/conda/feedstock_root/build_artifacts/tobler_1696384105449/work\n", + "toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work\n", + "tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work\n", + "tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1698950496895/work\n", + "toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work\n", + "tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827257044/work\n", + "tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1691671248568/work\n", + "traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work\n", + "typed-ast @ file:///home/conda/feedstock_root/build_artifacts/typed-ast_1695409893559/work\n", + "types-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/types-python-dateutil_1689882883784/work\n", + "typing-inspect==0.9.0\n", + "typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work\n", + "ujson @ file:///home/conda/feedstock_root/build_artifacts/ujson_1695472604200/work\n", + "uritemplate==3.0.1\n", + "urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1603125704209/work\n", + "virtualenv @ file:///home/conda/feedstock_root/build_artifacts/virtualenv_1643238754089/work\n", + "visions @ file:///home/conda/feedstock_root/build_artifacts/visions_1600915384170/work\n", + "watchdog @ file:///home/conda/feedstock_root/build_artifacts/watchdog_1695395257294/work\n", + "wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1698744702785/work\n", + "webencodings @ file:///home/conda/feedstock_root/build_artifacts/webencodings_1694681268211/work\n", + "websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1696770128353/work\n", + "widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1637174139311/work\n", + "wurlitzer @ file:///home/conda/feedstock_root/build_artifacts/wurlitzer_1669944596833/work\n", + "xyzservices @ file:///home/conda/feedstock_root/build_artifacts/xyzservices_1698325309404/work\n", + "yapf @ file:///home/conda/feedstock_root/build_artifacts/yapf_1690387939953/work\n", + "yarl @ file:///home/conda/feedstock_root/build_artifacts/yarl_1696732512110/work\n", + "zict @ file:///home/conda/feedstock_root/build_artifacts/zict_1681770155528/work\n", + "zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work\n", + "zstandard==0.22.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# Check packages:\n", + "%pip freeze" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "75f05b85-f116-42c5-a085-b0cb442a3d45", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- ID: string (nullable = true)\n", + " |-- start_timestamp: timestamp (nullable = true)\n", + " |-- end_timestamp: timestamp (nullable = true)\n", + " |-- seconds: integer (nullable = true)\n", + " |-- miles: double (nullable = true)\n", + " |-- pickup_tract: long (nullable = true)\n", + " |-- dropoff_tract: long (nullable = true)\n", + " |-- pickup_area: integer (nullable = true)\n", + " |-- dropoff_area: integer (nullable = true)\n", + " |-- Fare: double (nullable = true)\n", + " |-- Tip: integer (nullable = true)\n", + " |-- total: double (nullable = true)\n", + " |-- pickup_lat: double (nullable = true)\n", + " |-- pickup_lon: double (nullable = true)\n", + " |-- dropoff_lat: double (nullable = true)\n", + " |-- dropoff_lon: string (nullable = true)\n", + " |-- month: integer (nullable = true)\n", + " |-- day_of_month: integer (nullable = true)\n", + " |-- hour: integer (nullable = true)\n", + " |-- day: integer (nullable = true)\n", + " |-- year: integer (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "df_all.printSchema()" + ] + }, + { + "cell_type": "markdown", + "id": "2da49a33-200a-417b-b885-b1dc30632d22", + "metadata": {}, + "source": [ + "## Clustering Analysis:\n", + "### First we're going to run a time series k-means clustering on the entire City of Chicago. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3a1943f1-e3fd-44c6-a125-30268d45b7a6", + "metadata": {}, + "outputs": [], + "source": [ + "df_all = df_all.na.drop()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "04f8d859-1872-4496-b211-6f8a72469c2c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 294:====================================================>(597 + 3) / 600]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Silhouette with squared euclidean distance = 0.6724456252751287\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# Clustering by pick_up area. Understanding the most-popular spots in the city to call a rideshare and their locations:\n", + "feature_cols = [\"pickup_area\", \"pickup_lat\", \"pickup_lon\"]\n", + "\n", + "# Step 1: Vector Assembly:\n", + "feature_assembler = VectorAssembler(inputCols=feature_cols, outputCol=\"feature_vector\")\n", + "vector_assembler = VectorAssembler(inputCols=[\"feature_vector\"], outputCol=\"features\")\n", + "\n", + "# Step 2: Normalization:\n", + "scaler = StandardScaler(inputCol=\"features\", outputCol=\"scaled_features\", withStd=True, withMean=False)\n", + "\n", + "# Step 3: K-Means Clustering:\n", + "kmeans = KMeans(k=3, seed=1, featuresCol=\"scaled_features\", predictionCol=\"prediction\")\n", + "\n", + "# Step 4: Model Training:\n", + "pipeline = Pipeline(stages=[feature_assembler, vector_assembler, scaler, kmeans])\n", + "model = pipeline.fit(df_all)\n", + "\n", + "# Step 5: Prediction:\n", + "predictions = model.transform(df_all)\n", + "\n", + "# Evaluate clustering by computing Silhouette score:\n", + "evaluator = ClusteringEvaluator()\n", + "silhouette = evaluator.evaluate(predictions)\n", + "print(\"Silhouette with squared euclidean distance = \" + str(silhouette))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "86b58900-850f-4a83-82be-91ffc6f62be7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cluster Centers: \n", + "[ 1.80161164 869.01481622 -1588.97018395]\n", + "[ 4.18828318 871.29960529 -1593.60879614]\n", + "[ 5.21132153e-01 8.70095495e+02 -1.58910819e+03]\n" + ] + } + ], + "source": [ + "# Show the resulting clusters\n", + "centers = model.stages[-1].clusterCenters()\n", + "print(\"Cluster Centers: \")\n", + "for center in centers:\n", + " print(center)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b7acfa60-e70a-4d9c-a120-a9e71c037148", + "metadata": {}, + "outputs": [], + "source": [ + "# Display cluster assignments\n", + "chicago_clustering = predictions.select(\"pickup_area\", \"pickup_lat\", \"pickup_lon\", \"features\", \"scaled_features\", \"prediction\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ed1bed2e-98ab-4391-82a9-e98353394258", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 303:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------+-------------+--------------+--------------------+--------------------+----------+\n", + "|pickup_area| pickup_lat| pickup_lon| features| scaled_features|prediction|\n", + "+-----------+-------------+--------------+--------------------+--------------------+----------+\n", + "| 41|41.8016710371|-87.5942656985|[41.0,41.80167103...|[2.26101319928869...| 0|\n", + "| 5|41.9448137543|-87.6907750098|[5.0,41.944813754...|[0.27573331698642...| 2|\n", + "| 6|41.9359889065|-87.6709663837|[6.0,41.935988906...|[0.33087998038371...| 2|\n", + "| 6| 41.936159071|-87.6612652184|[6.0,41.936159071...|[0.33087998038371...| 2|\n", + "| 23|41.9066839592|-87.7103539349|[23.0,41.90668395...|[1.26837325813756...| 2|\n", + "| 41|41.8012268363|-87.5853031602|[41.0,41.80122683...|[2.26101319928869...| 0|\n", + "| 6| 41.942577185|-87.6470785093|[6.0,41.942577185...|[0.33087998038371...| 2|\n", + "| 6| 41.942577185|-87.6470785093|[6.0,41.942577185...|[0.33087998038371...| 2|\n", + "| 7|41.9217781876|-87.6510618838|[7.0,41.921778187...|[0.38602664378099...| 2|\n", + "| 7|41.9217781876|-87.6510618838|[7.0,41.921778187...|[0.38602664378099...| 2|\n", + "| 7|41.9217781876|-87.6510618838|[7.0,41.921778187...|[0.38602664378099...| 2|\n", + "| 3|41.9724370811|-87.6711095263|[3.0,41.972437081...|[0.16543999019185...| 2|\n", + "| 28|41.8773834707|-87.6806541164|[28.0,41.87738347...|[1.54410657512398...| 0|\n", + "| 16|41.9595267906|-87.7020985409|[16.0,41.95952679...|[0.88234661435656...| 2|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "| 32|41.8809944707|-87.6327464887|[32.0,41.88099447...|[1.76469322871312...| 0|\n", + "+-----------+-------------+--------------+--------------------+--------------------+----------+\n", + "only showing top 20 rows\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "# Seeing 20 of the results:\n", + "chicago_clustering.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ebe1200e-f37a-47bd-a78a-a74fafc4145f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 307:> (0 + 0) / 534][Stage 308:> (0 + 0) / 534]\r" + ] + } + ], + "source": [ + "print((chicago_clustering.count(), len(chicago_clustering.columns)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a2f3163-097d-4ca3-8105-d891019a7bad", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception in thread \"YARN application state monitor\" java.lang.OutOfMemoryError: GC overhead limit exceeded\n", + "\tat com.sun.proxy.$Proxy12.getApplicationReport(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationClientProtocolPBClientImpl.getApplicationReport(ApplicationClientProtocolPBClientImpl.java:256)\n", + "\tat sun.reflect.GeneratedMethodAccessor65.invoke(Unknown Source)\n", + "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n", + "\tat java.lang.reflect.Method.invoke(Method.java:498)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)\n", + "\tat com.sun.proxy.$Proxy13.getApplicationReport(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.client.api.impl.YarnClientImpl.getApplicationReport(YarnClientImpl.java:535)\n", + "\tat org.apache.spark.deploy.yarn.Client.getApplicationReport(Client.scala:345)\n", + "\tat org.apache.spark.deploy.yarn.Client.monitorApplication(Client.scala:1066)\n", + "\tat org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend$MonitorThread.run(YarnClientSchedulerBackend.scala:117)\n", + "23/11/24 22:27:46 ERROR com.codahale.metrics.ScheduledReporter: Exception thrown from StatsdReporter#report. Exception was suppressed.\n", + "java.lang.OutOfMemoryError: GC overhead limit exceeded\n", + "\tat java.util.Collections$UnmodifiableMap$UnmodifiableEntrySet.iterator(Collections.java:1656)\n", + "\tat scala.collection.convert.Wrappers$JSetWrapper.iterator(Wrappers.scala:163)\n", + "\tat scala.collection.IterableLike.foreach(IterableLike.scala:74)\n", + "\tat scala.collection.IterableLike.foreach$(IterableLike.scala:73)\n", + "\tat scala.collection.AbstractIterable.foreach(Iterable.scala:56)\n", + "\tat org.apache.spark.metrics.sink.StatsdReporter.$anonfun$report$7(StatsdReporter.scala:76)\n", + "\tat org.apache.spark.metrics.sink.StatsdReporter$$Lambda$2237/1648211405.apply$mcV$sp(Unknown Source)\n", + "\tat scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)\n", + "\tat scala.util.Try$.apply(Try.scala:213)\n", + "\tat org.apache.spark.metrics.sink.StatsdReporter.report(StatsdReporter.scala:75)\n", + "\tat com.codahale.metrics.ScheduledReporter.report(ScheduledReporter.java:237)\n", + "\tat com.codahale.metrics.ScheduledReporter.lambda$start$0(ScheduledReporter.java:177)\n", + "\tat com.codahale.metrics.ScheduledReporter$$Lambda$897/1780696444.run(Unknown Source)\n", + "\tat java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n", + "\tat java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)\n", + "\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)\n", + "\tat java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)\n", + "\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\n", + "\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 WARN org.apache.hadoop.ipc.Client: Exception encountered while connecting to the server \n", + "org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:374)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:623)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.access$2300(Client.java:414)\n", + "\tat org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:842)\n", + "\tat org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:838)\n", + "\tat java.security.AccessController.doPrivileged(Native Method)\n", + "\tat javax.security.auth.Subject.doAs(Subject.java:422)\n", + "\tat org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:838)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:414)\n", + "\tat org.apache.hadoop.ipc.Client.getConnection(Client.java:1654)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1479)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1432)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:231)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)\n", + "\tat com.sun.proxy.$Proxy20.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:77)\n", + "\tat sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source)\n", + "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n", + "\tat java.lang.reflect.Method.invoke(Method.java:498)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)\n", + "\tat com.sun.proxy.$Proxy21.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.allocate(AMRMClientImpl.java:325)\n", + "\tat org.apache.spark.deploy.yarn.YarnAllocator.allocateResources(YarnAllocator.scala:401)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster.org$apache$spark$deploy$yarn$ApplicationMaster$$allocationThreadImpl(ApplicationMaster.scala:576)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:646)\n", + "23/11/24 22:27:46 WARN org.apache.spark.HeartbeatReceiver: Removing executor 41 with no recent heartbeats: 606566 ms exceeds timeout 120000 ms\n", + "23/11/24 22:27:46 WARN org.apache.spark.network.server.TransportChannelHandler: Exception in connection from /10.128.0.13:49396\n", + "java.lang.OutOfMemoryError: GC overhead limit exceeded\n", + "23/11/24 22:27:46 WARN org.apache.spark.network.server.TransportChannelHandler: Exception in connection from /10.128.0.13:46910\n", + "java.lang.OutOfMemoryError: GC overhead limit exceeded\n", + "23/11/24 22:27:46 WARN org.apache.spark.HeartbeatReceiver: Removing executor 44 with no recent heartbeats: 606187 ms exceeds timeout 120000 ms\n", + "23/11/24 22:27:46 WARN org.apache.spark.HeartbeatReceiver: Removing executor 43 with no recent heartbeats: 606566 ms exceeds timeout 120000 ms\n", + "23/11/24 22:27:46 WARN org.apache.spark.rpc.netty.NettyRpcEnv: Ignored message: true\n", + "23/11/24 22:27:46 WARN org.apache.spark.deploy.yarn.ApplicationMaster: Reporter thread fails 1 time(s) in a row.\n", + "org.apache.hadoop.security.token.SecretManager$InvalidToken: appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n", + "\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n", + "\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.instantiateException(RPCUtil.java:53)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.instantiateIOException(RPCUtil.java:80)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.unwrapAndThrowException(RPCUtil.java:119)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:79)\n", + "\tat sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source)\n", + "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n", + "\tat java.lang.reflect.Method.invoke(Method.java:498)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)\n", + "\tat com.sun.proxy.$Proxy21.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.allocate(AMRMClientImpl.java:325)\n", + "\tat org.apache.spark.deploy.yarn.YarnAllocator.allocateResources(YarnAllocator.scala:401)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster.org$apache$spark$deploy$yarn$ApplicationMaster$$allocationThreadImpl(ApplicationMaster.scala:576)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:646)\n", + "Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1589)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1535)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1432)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:231)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)\n", + "\tat com.sun.proxy.$Proxy20.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:77)\n", + "\t... 13 more\n", + "23/11/24 22:27:46 ERROR org.apache.spark.scheduler.cluster.YarnScheduler: Lost executor 43 on hub-msca-bdp-dphub-students-rohitk-w-2.c.msca-bdp-student-ap.internal: Executor heartbeat timed out after 606566 ms\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7062424549637989301,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6032750637647102155,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5268706045776405251,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5591622169200431532,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5892056795421893606,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7342795057508538874,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=9002533160262250437,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7171009396498015550,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5123703343359460651,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5030036934419348835,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8061176774465780970,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6275514956338655765,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7828384139997219750,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5234905306283778916,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7789772703993336541,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6800628084432341581,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8213585359316955070,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5131226224466442418,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8617269945376389868,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8058121759705382362,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7416385524504093420,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4642252440098860807,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6509882082312688553,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.13:49396; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7634120766875275351,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7758302538454388336,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7978209826066971467,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7089452522359168993,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5630724675740277167,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7760660057606289933,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8248972273521563715,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6446068527737886466,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7989704962497205119,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5067183351138927627,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5541095263609571894,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6969817484473126207,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5550732265229726279,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7190263940346000551,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8911400010236913338,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6527343649626953451,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6836995330930616345,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8530956988707859204,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8003062449364760209,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4732309432786122124,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7337997448924489870,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7322390798941227700,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6992245375304881473,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8314434094591263209,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4899434880742290427,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6029321391095087720,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8617063757393656819,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5993956207069172087,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6587574803061694452,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5865241218922505797,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4771336953810389767,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7894267170970360490,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7701602773991044291,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8841052770018393551,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=9045520713432016446,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8438087716647885123,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=5324784523455447518,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7771201177250886556,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7993906302975017186,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8932539557589202258,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4953530044374065889,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7270169998797811892,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8018870935869479309,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6637838094204597345,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8705192790647397879,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=8840047058856808637,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThre" + ] + }, + { + "ename": "Py4JJavaError", + "evalue": "An error occurred while calling o1622.collectToPython.\n: java.lang.OutOfMemoryError: GC overhead limit exceeded\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mPy4JJavaError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[29], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Convert Spark DataFrame to Pandas DataFrame\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m pandas_df \u001b[38;5;241m=\u001b[39m \u001b[43mchicago_clustering\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtoPandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# Plotting chicago_clustering:\u001b[39;00m\n\u001b[1;32m 5\u001b[0m gdf \u001b[38;5;241m=\u001b[39m gpd\u001b[38;5;241m.\u001b[39mGeoDataFrame(pandas_df, geometry\u001b[38;5;241m=\u001b[39mgpd\u001b[38;5;241m.\u001b[39mpoints_from_xy(pandas_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpickup_lon\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m), pandas_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpickup_lat\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m)))\n", + "File \u001b[0;32m/usr/lib/spark/python/pyspark/sql/pandas/conversion.py:141\u001b[0m, in \u001b[0;36mPandasConversionMixin.toPandas\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[1;32m 140\u001b[0m \u001b[38;5;66;03m# Below is toPandas without Arrow optimization.\u001b[39;00m\n\u001b[0;32m--> 141\u001b[0m pdf \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame\u001b[38;5;241m.\u001b[39mfrom_records(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m, columns\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns)\n\u001b[1;32m 142\u001b[0m column_counter \u001b[38;5;241m=\u001b[39m Counter(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns)\n\u001b[1;32m 144\u001b[0m dtype \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28;01mNone\u001b[39;00m] \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mschema)\n", + "File \u001b[0;32m/usr/lib/spark/python/pyspark/sql/dataframe.py:677\u001b[0m, in \u001b[0;36mDataFrame.collect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 667\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Returns all the records as a list of :class:`Row`.\u001b[39;00m\n\u001b[1;32m 668\u001b[0m \n\u001b[1;32m 669\u001b[0m \u001b[38;5;124;03m.. versionadded:: 1.3.0\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 674\u001b[0m \u001b[38;5;124;03m[Row(age=2, name='Alice'), Row(age=5, name='Bob')]\u001b[39;00m\n\u001b[1;32m 675\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 676\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m SCCallSiteSync(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_sc) \u001b[38;5;28;01mas\u001b[39;00m css:\n\u001b[0;32m--> 677\u001b[0m sock_info \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_jdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollectToPython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 678\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))\n", + "File \u001b[0;32m/opt/conda/miniconda3/lib/python3.8/site-packages/py4j/java_gateway.py:1304\u001b[0m, in \u001b[0;36mJavaMember.__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 1298\u001b[0m command \u001b[38;5;241m=\u001b[39m proto\u001b[38;5;241m.\u001b[39mCALL_COMMAND_NAME \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1299\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcommand_header \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1300\u001b[0m args_command \u001b[38;5;241m+\u001b[39m\\\n\u001b[1;32m 1301\u001b[0m proto\u001b[38;5;241m.\u001b[39mEND_COMMAND_PART\n\u001b[1;32m 1303\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgateway_client\u001b[38;5;241m.\u001b[39msend_command(command)\n\u001b[0;32m-> 1304\u001b[0m return_value \u001b[38;5;241m=\u001b[39m \u001b[43mget_return_value\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1305\u001b[0m \u001b[43m \u001b[49m\u001b[43manswer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgateway_client\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtarget_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1307\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m temp_arg \u001b[38;5;129;01min\u001b[39;00m temp_args:\n\u001b[1;32m 1308\u001b[0m temp_arg\u001b[38;5;241m.\u001b[39m_detach()\n", + "File \u001b[0;32m/usr/lib/spark/python/pyspark/sql/utils.py:111\u001b[0m, in \u001b[0;36mcapture_sql_exception..deco\u001b[0;34m(*a, **kw)\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdeco\u001b[39m(\u001b[38;5;241m*\u001b[39ma, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw):\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m py4j\u001b[38;5;241m.\u001b[39mprotocol\u001b[38;5;241m.\u001b[39mPy4JJavaError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 113\u001b[0m converted \u001b[38;5;241m=\u001b[39m convert_exception(e\u001b[38;5;241m.\u001b[39mjava_exception)\n", + "File \u001b[0;32m/opt/conda/miniconda3/lib/python3.8/site-packages/py4j/protocol.py:326\u001b[0m, in \u001b[0;36mget_return_value\u001b[0;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[1;32m 324\u001b[0m value \u001b[38;5;241m=\u001b[39m OUTPUT_CONVERTER[\u001b[38;5;28mtype\u001b[39m](answer[\u001b[38;5;241m2\u001b[39m:], gateway_client)\n\u001b[1;32m 325\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m answer[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m REFERENCE_TYPE:\n\u001b[0;32m--> 326\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JJavaError(\n\u001b[1;32m 327\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name), value)\n\u001b[1;32m 329\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Py4JError(\n\u001b[1;32m 331\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAn error occurred while calling \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m. Trace:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{3}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39m\n\u001b[1;32m 332\u001b[0m \u001b[38;5;28mformat\u001b[39m(target_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m, name, value))\n", + "\u001b[0;31mPy4JJavaError\u001b[0m: An error occurred while calling o1622.collectToPython.\n: java.lang.OutOfMemoryError: GC overhead limit exceeded\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "adEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=6654704714059881652,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=7723646830402821403,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 ERROR org.apache.spark.network.server.TransportRequestHandler: Error sending result RpcResponse[requestId=4935209983121636070,body=NioManagedBuffer[buf=java.nio.HeapByteBuffer[pos=0 lim=81 cap=156]]] to /10.128.0.92:33856; closing connection\n", + "java.nio.channels.ClosedChannelException\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.newClosedChannelException(AbstractChannel.java:957)\n", + "\tat io.netty.channel.AbstractChannel$AbstractUnsafe.write(AbstractChannel.java:865)\n", + "\tat io.netty.channel.DefaultChannelPipeline$HeadContext.write(DefaultChannelPipeline.java:1367)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:717)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext.invokeWriteAndFlush(AbstractChannelHandlerContext.java:764)\n", + "\tat io.netty.channel.AbstractChannelHandlerContext$WriteTask.run(AbstractChannelHandlerContext.java:1071)\n", + "\tat io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:472)\n", + "\tat io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:500)\n", + "\tat io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n", + "\tat io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n", + "\tat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\n", + "\tat java.lang.Thread.run(Thread.java:750)\n", + "23/11/24 22:27:46 WARN org.apache.hadoop.ipc.Client: Exception encountered while connecting to the server \n", + "org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:374)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:623)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.access$2300(Client.java:414)\n", + "\tat org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:842)\n", + "\tat org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:838)\n", + "\tat java.security.AccessController.doPrivileged(Native Method)\n", + "\tat javax.security.auth.Subject.doAs(Subject.java:422)\n", + "\tat org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:838)\n", + "\tat org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:414)\n", + "\tat org.apache.hadoop.ipc.Client.getConnection(Client.java:1654)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1479)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1432)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:231)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)\n", + "\tat com.sun.proxy.$Proxy20.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:77)\n", + "\tat sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source)\n", + "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n", + "\tat java.lang.reflect.Method.invoke(Method.java:498)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)\n", + "\tat com.sun.proxy.$Proxy21.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.allocate(AMRMClientImpl.java:325)\n", + "\tat org.apache.spark.deploy.yarn.YarnAllocator.allocateResources(YarnAllocator.scala:401)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster.org$apache$spark$deploy$yarn$ApplicationMaster$$allocationThreadImpl(ApplicationMaster.scala:576)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:646)\n", + "23/11/24 22:27:46 WARN org.apache.spark.deploy.yarn.ApplicationMaster: Reporter thread fails 2 time(s) in a row.\n", + "org.apache.hadoop.security.token.SecretManager$InvalidToken: appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\n", + "\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\n", + "\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\n", + "\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.instantiateException(RPCUtil.java:53)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.instantiateIOException(RPCUtil.java:80)\n", + "\tat org.apache.hadoop.yarn.ipc.RPCUtil.unwrapAndThrowException(RPCUtil.java:119)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:79)\n", + "\tat sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source)\n", + "\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\n", + "\tat java.lang.reflect.Method.invoke(Method.java:498)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)\n", + "\tat org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)\n", + "\tat com.sun.proxy.$Proxy21.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl.allocate(AMRMClientImpl.java:325)\n", + "\tat org.apache.spark.deploy.yarn.YarnAllocator.allocateResources(YarnAllocator.scala:401)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster.org$apache$spark$deploy$yarn$ApplicationMaster$$allocationThreadImpl(ApplicationMaster.scala:576)\n", + "\tat org.apache.spark.deploy.yarn.ApplicationMaster$$anon$1.run(ApplicationMaster.scala:646)\n", + "Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.token.SecretManager$InvalidToken): appattempt_1700850934132_0002_000001 not found in AMRMTokenSecretManager.\n", + "\tat org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1589)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1535)\n", + "\tat org.apache.hadoop.ipc.Client.call(Client.java:1432)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:231)\n", + "\tat org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:118)\n", + "\tat com.sun.proxy.$Proxy20.allocate(Unknown Source)\n", + "\tat org.apache.hadoop.yarn.api.impl.pb.client.ApplicationMasterProtocolPBClientImpl.allocate(ApplicationMasterProtocolPBClientImpl.java:77)\n", + "\t... 13 more\n" + ] + } + ], + "source": [ + "# Convert Spark DataFrame to Pandas DataFrame\n", + "pandas_df = chicago_clustering.toPandas()\n", + "\n", + "# Create GeoDataFrame with simplified geometry\n", + "gdf = gpd.GeoDataFrame(pandas_df, geometry=gpd.points_from_xy(pandas_df['pickup_lon'].astype(float), pandas_df['pickup_lat'].astype(float)))\n", + "gdf['geometry'] = gdf['geometry'].simplify(tolerance=0.001)\n", + "\n", + "# Plot the clusters\n", + "fig, ax = plt.subplots(figsize=(10, 8))\n", + "gdf.plot(ax=ax, column='prediction', legend=True, markersize=50, cmap='viridis', legend_kwds={'label': \"Cluster\"})\n", + "\n", + "# Add labels and title\n", + "plt.xlabel('Longitude')\n", + "plt.ylabel('Latitude')\n", + "plt.title('Clusters of Pickup Areas')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a614bd22-b64e-4ced-827d-a7f5e7e613d0", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/unsupervised_ml.ipynb b/unsupervised_ml.ipynb deleted file mode 100644 index cf93e32..0000000 --- a/unsupervised_ml.ipynb +++ /dev/null @@ -1,1953 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "90484ea3-113d-4578-a654-1f80d22d49e6", - "metadata": {}, - "source": [ - "# Unsupervised ML\n", - "\n", - "This unsupervised component will be a \n", - "\n", - "Here's some Apache documentation of methods that could be useful:\n", - "\n", - "https://spark.apache.org/docs/latest/ml-clustering.html\n", - "https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.ml.clustering.KMeans.html\n", - "\n", - "This article is a good start. It has three parts:\n", - "\n", - "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-one/\n", - "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-two/\n", - "https://www.influxdata.com/blog/why-use-k-means-for-time-series-data-part-three/\n", - "\n", - "\n", - "This article is fine, but some methods are extremely advanced, and we don't have the neccessary packages installed:\n", - "\n", - "https://towardsdatascience.com/time-series-clustering-deriving-trends-and-archetypes-from-sequential-data-bb87783312b4" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "241961fd-69dd-4036-839c-d5ff609e034a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[('spark.stage.maxConsecutiveAttempts', '10'),\n", - " ('spark.dynamicAllocation.minExecutors', '1'),\n", - " ('spark.eventLog.enabled', 'true'),\n", - " ('spark.submit.pyFiles',\n", - " '/root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,/root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,/root/.ivy2/jars/com.typesafe_config-1.4.2.jar,/root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,/root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,/root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,/root/.ivy2/jars/com.navigamez_greex-1.0.jar,/root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,/root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,/root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,/root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,/root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,/root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,/root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,/root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,/root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,/root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,/root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,/root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,/root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,/root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,/root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,/root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,/root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,/root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,/root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,/root/.ivy2/jars/com.google.api_gax-2.20.1.jar,/root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,/root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,/root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,/root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,/root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,/root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,/root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,/root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,/root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,/root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,/root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,/root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,/root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,/root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,/root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,/root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,/root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,/root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,/root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,/root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,/root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,/root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,/root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,/root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,/root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,/root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,/root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,/root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.dataproc.metrics.listener.metrics.collector.hostname',\n", - " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", - " ('spark.dataproc.sql.joinConditionReorder.enabled', 'true'),\n", - " ('spark.kryoserializer.buffer.max', '2000M'),\n", - " ('spark.driver.port', '40739'),\n", - " ('spark.serializer', 'org.apache.spark.serializer.KryoSerializer'),\n", - " ('spark.dataproc.sql.local.rank.pushdown.enabled', 'true'),\n", - " ('spark.driver.host',\n", - " 'hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal'),\n", - " ('spark.driver.maxResultSize', '0'),\n", - " ('spark.yarn.unmanagedAM.enabled', 'true'),\n", - " ('spark.ui.filters',\n", - " 'org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter'),\n", - " ('spark.metrics.namespace',\n", - " 'app_name:${spark.app.name}.app_id:${spark.app.id}'),\n", - " ('spark.executor.memory', '4g'),\n", - " ('spark.dataproc.sql.optimizer.leftsemijoin.conversion.enabled', 'true'),\n", - " ('spark.app.startTime', '1700709314208'),\n", - " ('spark.hadoop.hive.execution.engine', 'mr'),\n", - " ('spark.executorEnv.PYTHONPATH',\n", - " '{{PWD}}/pyspark.zip{{PWD}}/py4j-0.10.9-src.zip{{PWD}}/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar{{PWD}}/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar{{PWD}}/com.typesafe_config-1.4.2.jar{{PWD}}/org.rocksdb_rocksdbjni-6.29.5.jar{{PWD}}/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar{{PWD}}/com.github.universal-automata_liblevenshtein-3.0.0.jar{{PWD}}/com.google.cloud_google-cloud-storage-2.16.0.jar{{PWD}}/com.navigamez_greex-1.0.jar{{PWD}}/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar{{PWD}}/it.unimi.dsi_fastutil-7.0.12.jar{{PWD}}/org.projectlombok_lombok-1.16.8.jar{{PWD}}/com.google.guava_guava-31.1-jre.jar{{PWD}}/com.google.guava_failureaccess-1.0.1.jar{{PWD}}/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar{{PWD}}/com.google.errorprone_error_prone_annotations-2.16.jar{{PWD}}/com.google.j2objc_j2objc-annotations-1.3.jar{{PWD}}/com.google.http-client_google-http-client-1.42.3.jar{{PWD}}/io.opencensus_opencensus-contrib-http-util-0.31.1.jar{{PWD}}/com.google.http-client_google-http-client-jackson2-1.42.3.jar{{PWD}}/com.google.http-client_google-http-client-gson-1.42.3.jar{{PWD}}/com.google.api-client_google-api-client-2.1.1.jar{{PWD}}/commons-codec_commons-codec-1.15.jar{{PWD}}/com.google.oauth-client_google-oauth-client-1.34.1.jar{{PWD}}/com.google.http-client_google-http-client-apache-v2-1.42.3.jar{{PWD}}/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar{{PWD}}/com.google.code.gson_gson-2.10.jar{{PWD}}/com.google.cloud_google-cloud-core-2.9.0.jar{{PWD}}/com.google.auto.value_auto-value-annotations-1.10.1.jar{{PWD}}/com.google.cloud_google-cloud-core-http-2.9.0.jar{{PWD}}/com.google.http-client_google-http-client-appengine-1.42.3.jar{{PWD}}/com.google.api_gax-httpjson-0.105.1.jar{{PWD}}/com.google.cloud_google-cloud-core-grpc-2.9.0.jar{{PWD}}/io.grpc_grpc-core-1.51.0.jar{{PWD}}/com.google.api_gax-2.20.1.jar{{PWD}}/com.google.api_gax-grpc-2.20.1.jar{{PWD}}/io.grpc_grpc-alts-1.51.0.jar{{PWD}}/io.grpc_grpc-grpclb-1.51.0.jar{{PWD}}/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar{{PWD}}/io.grpc_grpc-protobuf-1.51.0.jar{{PWD}}/com.google.auth_google-auth-library-credentials-1.13.0.jar{{PWD}}/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar{{PWD}}/com.google.api_api-common-2.2.2.jar{{PWD}}/javax.annotation_javax.annotation-api-1.3.2.jar{{PWD}}/io.opencensus_opencensus-api-0.31.1.jar{{PWD}}/io.grpc_grpc-context-1.51.0.jar{{PWD}}/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar{{PWD}}/com.google.protobuf_protobuf-java-3.21.10.jar{{PWD}}/com.google.protobuf_protobuf-java-util-3.21.10.jar{{PWD}}/com.google.api.grpc_proto-google-common-protos-2.11.0.jar{{PWD}}/org.threeten_threetenbp-1.6.4.jar{{PWD}}/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar{{PWD}}/com.fasterxml.jackson.core_jackson-core-2.14.1.jar{{PWD}}/com.google.code.findbugs_jsr305-3.0.2.jar{{PWD}}/io.grpc_grpc-api-1.51.0.jar{{PWD}}/io.grpc_grpc-auth-1.51.0.jar{{PWD}}/io.grpc_grpc-stub-1.51.0.jar{{PWD}}/org.checkerframework_checker-qual-3.28.0.jar{{PWD}}/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar{{PWD}}/io.grpc_grpc-protobuf-lite-1.51.0.jar{{PWD}}/com.google.android_annotations-4.1.1.4.jar{{PWD}}/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar{{PWD}}/io.grpc_grpc-netty-shaded-1.51.0.jar{{PWD}}/io.perfmark_perfmark-api-0.26.0.jar{{PWD}}/io.grpc_grpc-googleapis-1.51.0.jar{{PWD}}/io.grpc_grpc-xds-1.51.0.jar{{PWD}}/io.opencensus_opencensus-proto-0.2.0.jar{{PWD}}/io.grpc_grpc-services-1.51.0.jar{{PWD}}/com.google.re2j_re2j-1.6.jar{{PWD}}/dk.brics.automaton_automaton-1.11-8.jar{{PWD}}/org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS',\n", - " 'hub-msca-bdp-dphub-students-rohitk-m'),\n", - " ('spark.executor.id', 'driver'),\n", - " ('spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES',\n", - " 'http://hub-msca-bdp-dphub-students-rohitk-m:8088/proxy/application_1700703892135_0004'),\n", - " ('spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version', '2'),\n", - " ('spark.dynamicAllocation.maxExecutors', '10000'),\n", - " ('spark.yarn.dist.pyFiles',\n", - " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.yarn.am.attemptFailuresValidityInterval', '1h'),\n", - " ('spark.app.name', 'Spark Updated Conf'),\n", - " ('spark.sql.catalogImplementation', 'hive'),\n", - " ('spark.executorEnv.OPENBLAS_NUM_THREADS', '1'),\n", - " ('spark.yarn.secondary.jars',\n", - " 'com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,com.typesafe_config-1.4.2.jar,org.rocksdb_rocksdbjni-6.29.5.jar,com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,com.github.universal-automata_liblevenshtein-3.0.0.jar,com.google.cloud_google-cloud-storage-2.16.0.jar,com.navigamez_greex-1.0.jar,com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,it.unimi.dsi_fastutil-7.0.12.jar,org.projectlombok_lombok-1.16.8.jar,com.google.guava_guava-31.1-jre.jar,com.google.guava_failureaccess-1.0.1.jar,com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,com.google.errorprone_error_prone_annotations-2.16.jar,com.google.j2objc_j2objc-annotations-1.3.jar,com.google.http-client_google-http-client-1.42.3.jar,io.opencensus_opencensus-contrib-http-util-0.31.1.jar,com.google.http-client_google-http-client-jackson2-1.42.3.jar,com.google.http-client_google-http-client-gson-1.42.3.jar,com.google.api-client_google-api-client-2.1.1.jar,commons-codec_commons-codec-1.15.jar,com.google.oauth-client_google-oauth-client-1.34.1.jar,com.google.http-client_google-http-client-apache-v2-1.42.3.jar,com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,com.google.code.gson_gson-2.10.jar,com.google.cloud_google-cloud-core-2.9.0.jar,com.google.auto.value_auto-value-annotations-1.10.1.jar,com.google.cloud_google-cloud-core-http-2.9.0.jar,com.google.http-client_google-http-client-appengine-1.42.3.jar,com.google.api_gax-httpjson-0.105.1.jar,com.google.cloud_google-cloud-core-grpc-2.9.0.jar,io.grpc_grpc-core-1.51.0.jar,com.google.api_gax-2.20.1.jar,com.google.api_gax-grpc-2.20.1.jar,io.grpc_grpc-alts-1.51.0.jar,io.grpc_grpc-grpclb-1.51.0.jar,org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,io.grpc_grpc-protobuf-1.51.0.jar,com.google.auth_google-auth-library-credentials-1.13.0.jar,com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,com.google.api_api-common-2.2.2.jar,javax.annotation_javax.annotation-api-1.3.2.jar,io.opencensus_opencensus-api-0.31.1.jar,io.grpc_grpc-context-1.51.0.jar,com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,com.google.protobuf_protobuf-java-3.21.10.jar,com.google.protobuf_protobuf-java-util-3.21.10.jar,com.google.api.grpc_proto-google-common-protos-2.11.0.jar,org.threeten_threetenbp-1.6.4.jar,com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,com.fasterxml.jackson.core_jackson-core-2.14.1.jar,com.google.code.findbugs_jsr305-3.0.2.jar,io.grpc_grpc-api-1.51.0.jar,io.grpc_grpc-auth-1.51.0.jar,io.grpc_grpc-stub-1.51.0.jar,org.checkerframework_checker-qual-3.28.0.jar,com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,io.grpc_grpc-protobuf-lite-1.51.0.jar,com.google.android_annotations-4.1.1.4.jar,org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,io.grpc_grpc-netty-shaded-1.51.0.jar,io.perfmark_perfmark-api-0.26.0.jar,io.grpc_grpc-googleapis-1.51.0.jar,io.grpc_grpc-xds-1.51.0.jar,io.opencensus_opencensus-proto-0.2.0.jar,io.grpc_grpc-services-1.51.0.jar,com.google.re2j_re2j-1.6.jar,dk.brics.automaton_automaton-1.11-8.jar,org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.history.fs.logDirectory',\n", - " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/73ad5d52-7eed-4ef0-a116-3e9fd02da220/spark-job-history'),\n", - " ('spark.ui.proxyBase', '/proxy/application_1700703892135_0004'),\n", - " ('spark.repl.local.jars',\n", - " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.sql.cbo.enabled', 'true'),\n", - " ('spark.yarn.dist.jars',\n", - " 'file:///root/.ivy2/jars/com.johnsnowlabs.nlp_spark-nlp_2.12-4.4.0.jar,file:///root/.ivy2/jars/graphframes_graphframes-0.8.2-spark3.1-s_2.12.jar,file:///root/.ivy2/jars/com.typesafe_config-1.4.2.jar,file:///root/.ivy2/jars/org.rocksdb_rocksdbjni-6.29.5.jar,file:///root/.ivy2/jars/com.amazonaws_aws-java-sdk-bundle-1.11.828.jar,file:///root/.ivy2/jars/com.github.universal-automata_liblevenshtein-3.0.0.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-storage-2.16.0.jar,file:///root/.ivy2/jars/com.navigamez_greex-1.0.jar,file:///root/.ivy2/jars/com.johnsnowlabs.nlp_tensorflow-cpu_2.12-0.4.4.jar,file:///root/.ivy2/jars/it.unimi.dsi_fastutil-7.0.12.jar,file:///root/.ivy2/jars/org.projectlombok_lombok-1.16.8.jar,file:///root/.ivy2/jars/com.google.guava_guava-31.1-jre.jar,file:///root/.ivy2/jars/com.google.guava_failureaccess-1.0.1.jar,file:///root/.ivy2/jars/com.google.guava_listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar,file:///root/.ivy2/jars/com.google.errorprone_error_prone_annotations-2.16.jar,file:///root/.ivy2/jars/com.google.j2objc_j2objc-annotations-1.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-1.42.3.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-contrib-http-util-0.31.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-jackson2-1.42.3.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-gson-1.42.3.jar,file:///root/.ivy2/jars/com.google.api-client_google-api-client-2.1.1.jar,file:///root/.ivy2/jars/commons-codec_commons-codec-1.15.jar,file:///root/.ivy2/jars/com.google.oauth-client_google-oauth-client-1.34.1.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-apache-v2-1.42.3.jar,file:///root/.ivy2/jars/com.google.apis_google-api-services-storage-v1-rev20220705-2.0.0.jar,file:///root/.ivy2/jars/com.google.code.gson_gson-2.10.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-2.9.0.jar,file:///root/.ivy2/jars/com.google.auto.value_auto-value-annotations-1.10.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-http-2.9.0.jar,file:///root/.ivy2/jars/com.google.http-client_google-http-client-appengine-1.42.3.jar,file:///root/.ivy2/jars/com.google.api_gax-httpjson-0.105.1.jar,file:///root/.ivy2/jars/com.google.cloud_google-cloud-core-grpc-2.9.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-core-1.51.0.jar,file:///root/.ivy2/jars/com.google.api_gax-2.20.1.jar,file:///root/.ivy2/jars/com.google.api_gax-grpc-2.20.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-alts-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-grpclb-1.51.0.jar,file:///root/.ivy2/jars/org.conscrypt_conscrypt-openjdk-uber-2.5.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-1.51.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-credentials-1.13.0.jar,file:///root/.ivy2/jars/com.google.auth_google-auth-library-oauth2-http-1.13.0.jar,file:///root/.ivy2/jars/com.google.api_api-common-2.2.2.jar,file:///root/.ivy2/jars/javax.annotation_javax.annotation-api-1.3.2.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-api-0.31.1.jar,file:///root/.ivy2/jars/io.grpc_grpc-context-1.51.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-3.21.10.jar,file:///root/.ivy2/jars/com.google.protobuf_protobuf-java-util-3.21.10.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-common-protos-2.11.0.jar,file:///root/.ivy2/jars/org.threeten_threetenbp-1.6.4.jar,file:///root/.ivy2/jars/com.google.api.grpc_proto-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.google.api.grpc_gapic-google-cloud-storage-v2-2.16.0-alpha.jar,file:///root/.ivy2/jars/com.fasterxml.jackson.core_jackson-core-2.14.1.jar,file:///root/.ivy2/jars/com.google.code.findbugs_jsr305-3.0.2.jar,file:///root/.ivy2/jars/io.grpc_grpc-api-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-auth-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-stub-1.51.0.jar,file:///root/.ivy2/jars/org.checkerframework_checker-qual-3.28.0.jar,file:///root/.ivy2/jars/com.google.api.grpc_grpc-google-iam-v1-1.6.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-protobuf-lite-1.51.0.jar,file:///root/.ivy2/jars/com.google.android_annotations-4.1.1.4.jar,file:///root/.ivy2/jars/org.codehaus.mojo_animal-sniffer-annotations-1.22.jar,file:///root/.ivy2/jars/io.grpc_grpc-netty-shaded-1.51.0.jar,file:///root/.ivy2/jars/io.perfmark_perfmark-api-0.26.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-googleapis-1.51.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-xds-1.51.0.jar,file:///root/.ivy2/jars/io.opencensus_opencensus-proto-0.2.0.jar,file:///root/.ivy2/jars/io.grpc_grpc-services-1.51.0.jar,file:///root/.ivy2/jars/com.google.re2j_re2j-1.6.jar,file:///root/.ivy2/jars/dk.brics.automaton_automaton-1.11-8.jar,file:///root/.ivy2/jars/org.slf4j_slf4j-api-1.7.16.jar'),\n", - " ('spark.dataproc.sql.parquet.enableFooterCache', 'true'),\n", - " ('spark.driver.memory', '4g'),\n", - " ('spark.sql.warehouse.dir', 'file:/spark-warehouse'),\n", - " ('spark.yarn.executor.failuresValidityInterval', '1h'),\n", - " ('spark.app.id', 'application_1700703892135_0004'),\n", - " ('spark.yarn.am.memory', '640m'),\n", - " ('spark.yarn.historyServer.address',\n", - " 'hub-msca-bdp-dphub-students-rohitk-m:18080'),\n", - " ('spark.cores.max', '4'),\n", - " ('spark.executor.cores', '4'),\n", - " ('spark.eventLog.dir',\n", - " 'gs://dataproc-temp-us-central1-635155370842-uzamlpgc/73ad5d52-7eed-4ef0-a116-3e9fd02da220/spark-job-history'),\n", - " ('spark.jars.packages',\n", - " 'com.johnsnowlabs.nlp:spark-nlp_2.12:4.4.0,graphframes:graphframes:0.8.2-spark3.1-s_2.12'),\n", - " ('spark.executor.instances', '2'),\n", - " ('spark.dataproc.listeners',\n", - " 'com.google.cloud.spark.performance.DataprocMetricsListener'),\n", - " ('spark.sql.autoBroadcastJoinThreshold', '90m'),\n", - " ('spark.serializer.objectStreamReset', '100'),\n", - " ('spark.submit.deployMode', 'client'),\n", - " ('spark.driver.appUIAddress',\n", - " 'http://hub-msca-bdp-dphub-students-rohitk-m.c.msca-bdp-student-ap.internal:42407'),\n", - " ('spark.sql.cbo.joinReorder.enabled', 'true'),\n", - " ('spark.shuffle.service.enabled', 'true'),\n", - " ('spark.scheduler.mode', 'FAIR'),\n", - " ('spark.sql.adaptive.enabled', 'true'),\n", - " ('spark.yarn.jars', 'local:/usr/lib/spark/jars/*'),\n", - " ('spark.scheduler.minRegisteredResourcesRatio', '0.0'),\n", - " ('spark.master', 'yarn'),\n", - " ('spark.ui.port', '0'),\n", - " ('spark.rpc.message.maxSize', '512'),\n", - " ('spark.rdd.compress', 'True'),\n", - " ('spark.task.maxFailures', '10'),\n", - " ('spark.yarn.isPython', 'true'),\n", - " ('spark.dynamicAllocation.enabled', 'true'),\n", - " ('spark.ui.showConsoleProgress', 'true')]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# read in packages create spark environment\n", - "from pyspark.sql import SparkSession\n", - "from pyspark.sql import functions as F\n", - "\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "\n", - "spark = SparkSession.builder.appName('unsupervised').getOrCreate()\n", - "\n", - "#change configuration settings on Spark \n", - "conf = spark.sparkContext._conf.setAll([('spark.executor.memory', '4g'), ('spark.app.name', 'Spark Updated Conf'), ('spark.executor.cores', '4'), ('spark.cores.max', '4'), ('spark.driver.memory','4g')])\n", - "\n", - "#print spark configuration settings\n", - "spark.sparkContext.getConf().getAll()" - ] - }, - { - "cell_type": "markdown", - "id": "6e8338f7-2c56-4e05-b6b3-78577485dae4", - "metadata": {}, - "source": [ - "### Reading in cleaned data, partitioning" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b98781e4-a2cd-4da2-aa73-70de31956265", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", - "| ID| start_timestamp| end_timestamp|seconds|miles|pickup_tract|dropoff_tract|pickup_area|dropoff_area|Fare|Tip|total| pickup_lat| pickup_lon| dropoff_lat| dropoff_lon|month|day_of_month|hour|day|\n", - "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", - "|625e77ae6e0ff7191...|2018-11-06 19:00:00|2018-11-06 19:15:00| 1142| 5.8| 17031063400| 17031010400| 6| 1|12.5| 0| 15.0|41.9346591566|-87.6467297286| 42.004764559| -87.659122427| 11| 6| 19| 3|\n", - "|62945fdb2e70957f0...|2018-11-06 19:00:00|2018-11-06 19:00:00| 341| 1.2| 17031081800| 17031833000| 8| 28| 5.0| 0| 7.5|41.8932163595|-87.6378442095|41.8852813201|-87.6572331997| 11| 6| 19| 3|\n", - "|6dc03f91e4480d237...|2018-11-06 19:00:00|2018-11-06 19:00:00| 558| 1.2| 17031070400| 17031061500| 7| 6| 7.5| 0| 10.3|41.9289672664|-87.6561568309|41.9452823311|-87.6615450961| 11| 6| 19| 3|\n", - "|773894079a526afa1...|2018-11-06 19:00:00|2018-11-06 19:30:00| 1047| 2.8| 17031832200| 17031062100| 22| 6|10.0| 2| 14.5|41.9204515116|-87.6799547678|41.9426918444|-87.6517705068| 11| 6| 19| 3|\n", - "|7acf0a7f2edfbe546...|2018-11-06 19:00:00|2018-11-06 19:00:00| 502| 1.3| 17031839100| 17031081700| 32| 8| 2.5| 0| 5.0|41.8809944707|-87.6327464887|41.8920421365|-87.6318639497| 11| 6| 19| 3|\n", - "+--------------------+-------------------+-------------------+-------+-----+------------+-------------+-----------+------------+----+---+-----+-------------+--------------+-------------+--------------+-----+------------+----+---+\n", - "only showing top 5 rows\n", - "\n" - ] - } - ], - "source": [ - "# read in rideshare data for all years, concatenate, create appropriate partitioning\n", - "# we are dropping 2020 because covid will affect the performance of our model\n", - "\n", - "df_2018 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2018.csv\", inferSchema=True, header=True)\n", - "df_2019 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2019.csv\", inferSchema=True, header=True)\n", - "df_2021 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2021.csv\", inferSchema=True, header=True)\n", - "df_2022 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2022.csv\", inferSchema=True, header=True)\n", - "df_2023 = spark.read.csv(\"gs://msca-bdp-student-gcs/bdp-rideshare-project/rideshare/processed_data/rides_2023.csv\", inferSchema=True, header=True)\n", - "\n", - "# dropping new columns in 2023\n", - "df_2023 = df_2023.drop('Shared Trip Match','Percent Time Chicago','Percent Distance Chicago')\n", - "\n", - "df_all = df_2018.union(df_2019).union(df_2021).union(df_2022).union(df_2023)\n", - "df_all.show(5)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "978c6a20-72a3-4a7e-a588-4e1ad2386915", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Partitions: 544\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 32:=====================================================>(538 + 6) / 544]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----------+------+\n", - "|partitionId| count|\n", - "+-----------+------+\n", - "| 42|305254|\n", - "| 41|305316|\n", - "| 40|305420|\n", - "| 38|305471|\n", - "| 39|305480|\n", - "| 37|305618|\n", - "| 36|305676|\n", - "| 35|305871|\n", - "| 34|305890|\n", - "| 33|305962|\n", - "| 32|305971|\n", - "| 31|306010|\n", - "| 29|306031|\n", - "| 30|306038|\n", - "| 28|306086|\n", - "| 27|306127|\n", - "| 26|306402|\n", - "| 25|306467|\n", - "| 24|306633|\n", - "| 23|306731|\n", - "| 22|307226|\n", - "| 243|328837|\n", - "| 242|328975|\n", - "| 241|329131|\n", - "| 240|329163|\n", - "| 239|329209|\n", - "| 237|329245|\n", - "| 238|329263|\n", - "| 235|329263|\n", - "| 234|329311|\n", - "| 236|329315|\n", - "| 232|329332|\n", - "| 233|329344|\n", - "| 231|329373|\n", - "| 228|329389|\n", - "| 229|329390|\n", - "| 227|329399|\n", - "| 226|329410|\n", - "| 225|329410|\n", - "| 224|329418|\n", - "| 230|329427|\n", - "| 223|329428|\n", - "| 220|329461|\n", - "| 222|329481|\n", - "| 221|329505|\n", - "| 217|329507|\n", - "| 218|329513|\n", - "| 219|329519|\n", - "| 216|329523|\n", - "| 214|329533|\n", - "| 213|329555|\n", - "| 215|329574|\n", - "| 211|329587|\n", - "| 212|329591|\n", - "| 208|329607|\n", - "| 210|329623|\n", - "| 206|329624|\n", - "| 209|329630|\n", - "| 207|329633|\n", - "| 205|329646|\n", - "| 202|329654|\n", - "| 204|329673|\n", - "| 203|329678|\n", - "| 194|329704|\n", - "| 201|329708|\n", - "| 200|329712|\n", - "| 191|329717|\n", - "| 189|329728|\n", - "| 188|329730|\n", - "| 199|329732|\n", - "| 193|329732|\n", - "| 198|329739|\n", - "| 190|329746|\n", - "| 195|329748|\n", - "| 197|329786|\n", - "| 186|329786|\n", - "| 196|329786|\n", - "| 187|329787|\n", - "| 192|329793|\n", - "| 183|329804|\n", - "| 184|329805|\n", - "| 179|329807|\n", - "| 182|329814|\n", - "| 181|329819|\n", - "| 185|329843|\n", - "| 178|329849|\n", - "| 177|329861|\n", - "| 180|329865|\n", - "| 172|329866|\n", - "| 173|329871|\n", - "| 171|329879|\n", - "| 175|329889|\n", - "| 174|329892|\n", - "| 176|329900|\n", - "| 170|329937|\n", - "| 168|329956|\n", - "| 162|329961|\n", - "| 167|329962|\n", - "| 164|329965|\n", - "| 169|329974|\n", - "| 165|329980|\n", - "| 163|329985|\n", - "| 159|329993|\n", - "| 166|330000|\n", - "| 158|330005|\n", - "| 161|330014|\n", - "| 157|330021|\n", - "| 160|330027|\n", - "| 156|330059|\n", - "| 154|330066|\n", - "| 147|330070|\n", - "| 153|330083|\n", - "| 150|330084|\n", - "| 155|330084|\n", - "| 146|330089|\n", - "| 152|330090|\n", - "| 149|330099|\n", - "| 151|330101|\n", - "| 148|330103|\n", - "| 144|330116|\n", - "| 145|330129|\n", - "| 140|330133|\n", - "| 143|330135|\n", - "| 141|330139|\n", - "| 142|330148|\n", - "| 139|330159|\n", - "| 135|330178|\n", - "| 137|330181|\n", - "| 132|330186|\n", - "| 138|330189|\n", - "| 136|330205|\n", - "| 129|330208|\n", - "| 133|330219|\n", - "| 125|330220|\n", - "| 128|330227|\n", - "| 134|330230|\n", - "| 127|330236|\n", - "| 130|330243|\n", - "| 124|330245|\n", - "| 117|330249|\n", - "| 131|330252|\n", - "| 121|330255|\n", - "| 122|330258|\n", - "| 126|330268|\n", - "| 123|330270|\n", - "| 118|330275|\n", - "| 115|330282|\n", - "| 113|330282|\n", - "| 120|330296|\n", - "| 112|330297|\n", - "| 119|330306|\n", - "| 114|330308|\n", - "| 104|330338|\n", - "| 116|330339|\n", - "| 109|330355|\n", - "| 105|330363|\n", - "| 111|330366|\n", - "| 110|330372|\n", - "| 108|330383|\n", - "| 107|330385|\n", - "| 106|330402|\n", - "| 96|330413|\n", - "| 102|330415|\n", - "| 100|330418|\n", - "| 98|330418|\n", - "| 101|330419|\n", - "| 94|330425|\n", - "| 97|330426|\n", - "| 99|330430|\n", - "| 103|330430|\n", - "| 95|330436|\n", - "| 90|330456|\n", - "| 92|330470|\n", - "| 88|330471|\n", - "| 91|330474|\n", - "| 93|330477|\n", - "| 87|330494|\n", - "| 86|330501|\n", - "| 89|330512|\n", - "| 84|330524|\n", - "| 82|330550|\n", - "| 80|330570|\n", - "| 85|330570|\n", - "| 81|330579|\n", - "| 83|330585|\n", - "| 78|330622|\n", - "| 79|330625|\n", - "| 76|330625|\n", - "| 75|330642|\n", - "| 77|330646|\n", - "| 71|330651|\n", - "| 74|330653|\n", - "| 73|330667|\n", - "| 70|330690|\n", - "| 65|330704|\n", - "| 72|330709|\n", - "| 67|330721|\n", - "| 66|330724|\n", - "| 69|330737|\n", - "| 62|330758|\n", - "| 64|330762|\n", - "| 63|330762|\n", - "| 68|330766|\n", - "| 60|330782|\n", - "| 59|330784|\n", - "| 56|330801|\n", - "| 57|330805|\n", - "| 61|330807|\n", - "| 58|330837|\n", - "| 53|330868|\n", - "| 55|330869|\n", - "| 54|330885|\n", - "| 52|330918|\n", - "| 50|330944|\n", - "| 51|330963|\n", - "| 49|331028|\n", - "| 48|331034|\n", - "| 47|331050|\n", - "| 46|331114|\n", - "| 45|331284|\n", - "| 44|331416|\n", - "| 543|364094|\n", - "| 542|364374|\n", - "| 541|364493|\n", - "| 537|364581|\n", - "| 538|364599|\n", - "| 539|364616|\n", - "| 540|364617|\n", - "| 536|364654|\n", - "| 534|364709|\n", - "| 535|364756|\n", - "| 532|364784|\n", - "| 533|364810|\n", - "| 529|364899|\n", - "| 530|364903|\n", - "| 531|364944|\n", - "| 528|364957|\n", - "| 527|364961|\n", - "| 524|364971|\n", - "| 525|364988|\n", - "| 526|365006|\n", - "| 522|365011|\n", - "| 523|365051|\n", - "| 521|365057|\n", - "| 520|365079|\n", - "| 518|365083|\n", - "| 517|365090|\n", - "| 519|365097|\n", - "| 516|365122|\n", - "| 514|365165|\n", - "| 515|365179|\n", - "| 513|365224|\n", - "| 509|365252|\n", - "| 506|365253|\n", - "| 511|365255|\n", - "| 508|365272|\n", - "| 510|365277|\n", - "| 512|365278|\n", - "| 507|365302|\n", - "| 505|365347|\n", - "| 502|365377|\n", - "| 503|365394|\n", - "| 504|365395|\n", - "| 501|365409|\n", - "| 500|365431|\n", - "| 498|365447|\n", - "| 499|365454|\n", - "| 497|365519|\n", - "| 496|365528|\n", - "| 495|365536|\n", - "| 492|365541|\n", - "| 489|365547|\n", - "| 488|365552|\n", - "| 487|365554|\n", - "| 490|365569|\n", - "| 493|365574|\n", - "| 484|365576|\n", - "| 494|365595|\n", - "| 485|365602|\n", - "| 486|365622|\n", - "| 491|365622|\n", - "| 483|365650|\n", - "| 482|365684|\n", - "| 481|365705|\n", - "| 479|365750|\n", - "| 478|365773|\n", - "| 477|365793|\n", - "| 480|365801|\n", - "| 475|365806|\n", - "| 474|365806|\n", - "| 473|365828|\n", - "| 476|365846|\n", - "| 472|365909|\n", - "| 471|365965|\n", - "| 470|365975|\n", - "| 469|366026|\n", - "| 466|366051|\n", - "| 467|366057|\n", - "| 468|366080|\n", - "| 464|366105|\n", - "| 465|366117|\n", - "| 462|366150|\n", - "| 463|366160|\n", - "| 458|366193|\n", - "| 461|366200|\n", - "| 460|366214|\n", - "| 459|366217|\n", - "| 456|366297|\n", - "| 457|366320|\n", - "| 455|366371|\n", - "| 454|366383|\n", - "| 453|366422|\n", - "| 452|366461|\n", - "| 451|366589|\n", - "| 450|366617|\n", - "| 449|366758|\n", - "| 448|366799|\n", - "| 447|366883|\n", - "| 446|366901|\n", - "| 445|366940|\n", - "| 444|367122|\n", - "| 21|380513|\n", - "| 20|380565|\n", - "| 19|380749|\n", - "| 18|381028|\n", - "| 17|381069|\n", - "| 16|381243|\n", - "| 15|381263|\n", - "| 14|381438|\n", - "| 13|381470|\n", - "| 12|381544|\n", - "| 11|381646|\n", - "| 10|381711|\n", - "| 8|381721|\n", - "| 9|381753|\n", - "| 7|381759|\n", - "| 6|381763|\n", - "| 5|381783|\n", - "| 4|381827|\n", - "| 3|381971|\n", - "| 1|382022|\n", - "| 2|382029|\n", - "| 0|382095|\n", - "| 342|420259|\n", - "| 343|420346|\n", - "| 341|420485|\n", - "| 340|420525|\n", - "| 339|420707|\n", - "| 336|421031|\n", - "| 337|421040|\n", - "| 338|421052|\n", - "| 334|421107|\n", - "| 335|421142|\n", - "| 333|421374|\n", - "| 330|421440|\n", - "| 332|421479|\n", - "| 331|421531|\n", - "| 327|421574|\n", - "| 328|421603|\n", - "| 329|421610|\n", - "| 326|421612|\n", - "| 322|421670|\n", - "| 320|421675|\n", - "| 325|421679|\n", - "| 324|421681|\n", - "| 319|421687|\n", - "| 323|421687|\n", - "| 321|421699|\n", - "| 318|421751|\n", - "| 315|421832|\n", - "| 310|421867|\n", - "| 316|421897|\n", - "| 312|421903|\n", - "| 317|421911|\n", - "| 314|421918|\n", - "| 313|421920|\n", - "| 311|421950|\n", - "| 309|421972|\n", - "| 307|421988|\n", - "| 308|422019|\n", - "| 305|422072|\n", - "| 303|422083|\n", - "| 306|422091|\n", - "| 304|422095|\n", - "| 302|422097|\n", - "| 298|422103|\n", - "| 300|422114|\n", - "| 301|422116|\n", - "| 295|422134|\n", - "| 296|422155|\n", - "| 299|422155|\n", - "| 290|422185|\n", - "| 297|422193|\n", - "| 294|422194|\n", - "| 292|422207|\n", - "| 291|422218|\n", - "| 293|422236|\n", - "| 288|422238|\n", - "| 286|422255|\n", - "| 289|422265|\n", - "| 287|422266|\n", - "| 285|422305|\n", - "| 283|422307|\n", - "| 284|422346|\n", - "| 282|422350|\n", - "| 281|422354|\n", - "| 280|422372|\n", - "| 279|422415|\n", - "| 278|422498|\n", - "| 277|422501|\n", - "| 276|422508|\n", - "| 275|422549|\n", - "| 274|422557|\n", - "| 273|422591|\n", - "| 272|422625|\n", - "| 270|422634|\n", - "| 269|422671|\n", - "| 268|422673|\n", - "| 271|422692|\n", - "| 267|422694|\n", - "| 265|422761|\n", - "| 262|422777|\n", - "| 260|422788|\n", - "| 263|422795|\n", - "| 266|422803|\n", - "| 264|422807|\n", - "| 258|422838|\n", - "| 259|422839|\n", - "| 261|422841|\n", - "| 257|422852|\n", - "| 256|422891|\n", - "| 252|422904|\n", - "| 255|422925|\n", - "| 254|422986|\n", - "| 253|423003|\n", - "| 250|423197|\n", - "| 251|423202|\n", - "| 248|423231|\n", - "| 249|423262|\n", - "| 246|423376|\n", - "| 247|423402|\n", - "| 245|423403|\n", - "| 244|423762|\n", - "| 43|457702|\n", - "| 443|569570|\n", - "| 442|570154|\n", - "| 441|570301|\n", - "| 440|570372|\n", - "| 439|570572|\n", - "| 438|570655|\n", - "| 436|570763|\n", - "| 437|570781|\n", - "| 434|570870|\n", - "| 435|570872|\n", - "| 433|570953|\n", - "| 432|570979|\n", - "| 431|571069|\n", - "| 429|571096|\n", - "| 430|571097|\n", - "| 428|571127|\n", - "| 427|571153|\n", - "| 426|571185|\n", - "| 425|571201|\n", - "| 424|571286|\n", - "| 423|571425|\n", - "| 422|571449|\n", - "| 417|571506|\n", - "| 420|571528|\n", - "| 421|571532|\n", - "| 419|571553|\n", - "| 418|571585|\n", - "| 416|571595|\n", - "| 414|571645|\n", - "| 415|571657|\n", - "| 413|571742|\n", - "| 412|571766|\n", - "| 411|571796|\n", - "| 409|571842|\n", - "| 410|571847|\n", - "| 407|571874|\n", - "| 408|571913|\n", - "| 406|571925|\n", - "| 405|571966|\n", - "| 404|571983|\n", - "| 402|571993|\n", - "| 403|572020|\n", - "| 401|572123|\n", - "| 397|572181|\n", - "| 400|572182|\n", - "| 399|572183|\n", - "| 398|572189|\n", - "| 396|572212|\n", - "| 395|572244|\n", - "| 393|572249|\n", - "| 394|572276|\n", - "| 392|572302|\n", - "| 391|572344|\n", - "| 390|572361|\n", - "| 389|572382|\n", - "| 388|572394|\n", - "| 387|572428|\n", - "| 386|572438|\n", - "| 385|572493|\n", - "| 383|572545|\n", - "| 384|572565|\n", - "| 382|572569|\n", - "| 381|572600|\n", - "| 380|572604|\n", - "| 379|572634|\n", - "| 378|572647|\n", - "| 376|572742|\n", - "| 375|572742|\n", - "| 377|572755|\n", - "| 374|572798|\n", - "| 372|572800|\n", - "| 373|572816|\n", - "| 371|572868|\n", - "| 370|572895|\n", - "| 369|572907|\n", - "| 368|572924|\n", - "| 367|572957|\n", - "| 366|573022|\n", - "| 364|573102|\n", - "| 365|573104|\n", - "| 362|573132|\n", - "| 363|573145|\n", - "| 361|573173|\n", - "| 360|573187|\n", - "| 358|573262|\n", - "| 359|573270|\n", - "| 357|573334|\n", - "| 356|573372|\n", - "| 355|573425|\n", - "| 354|573556|\n", - "| 353|573584|\n", - "| 352|573658|\n", - "| 351|573676|\n", - "| 350|573781|\n", - "| 349|573977|\n", - "| 347|574013|\n", - "| 348|574040|\n", - "| 346|574185|\n", - "| 345|574318|\n", - "| 344|574727|\n", - "+-----------+------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "#display number of records by partition\n", - "def displaypartitions(df):\n", - " #number of records by partition\n", - " num = df.rdd.getNumPartitions()\n", - " print(\"Partitions:\", num)\n", - " df.withColumn(\"partitionId\", F.spark_partition_id())\\\n", - " .groupBy(\"partitionId\")\\\n", - " .count()\\\n", - " .orderBy(F.asc(\"count\"))\\\n", - " .show(num)\n", - "\n", - "df_all.rdd.getNumPartitions()\n", - "displaypartitions(df_all)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "8c914559-481c-4dbe-8438-91eeb2795b54", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 35:=====================================================>(543 + 1) / 544]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Partitions: 600\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[Stage 38:=================================================> (565 + 32) / 600]\r" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "+-----------+------+\n", - "|partitionId| count|\n", - "+-----------+------+\n", - "| 407|362152|\n", - "| 404|362153|\n", - "| 403|362153|\n", - "| 398|362153|\n", - "| 406|362153|\n", - "| 405|362153|\n", - "| 408|362153|\n", - "| 399|362153|\n", - "| 136|362154|\n", - "| 137|362154|\n", - "| 400|362154|\n", - "| 397|362154|\n", - "| 396|362154|\n", - "| 409|362154|\n", - "| 135|362154|\n", - "| 535|362154|\n", - "| 138|362154|\n", - "| 401|362154|\n", - "| 69|362155|\n", - "| 140|362155|\n", - "| 537|362155|\n", - "| 108|362155|\n", - "| 539|362155|\n", - "| 146|362155|\n", - "| 538|362155|\n", - "| 534|362155|\n", - "| 402|362155|\n", - "| 148|362155|\n", - "| 536|362155|\n", - "| 106|362155|\n", - "| 410|362155|\n", - "| 151|362155|\n", - "| 107|362155|\n", - "| 70|362155|\n", - "| 105|362155|\n", - "| 147|362155|\n", - "| 144|362155|\n", - "| 134|362155|\n", - "| 139|362155|\n", - "| 150|362155|\n", - "| 84|362156|\n", - "| 395|362156|\n", - "| 92|362156|\n", - "| 143|362156|\n", - "| 160|362156|\n", - "| 394|362156|\n", - "| 411|362156|\n", - "| 109|362156|\n", - "| 72|362156|\n", - "| 145|362156|\n", - "| 93|362156|\n", - "| 94|362156|\n", - "| 532|362156|\n", - "| 142|362156|\n", - "| 68|362156|\n", - "| 71|362156|\n", - "| 101|362156|\n", - "| 412|362156|\n", - "| 529|362156|\n", - "| 149|362156|\n", - "| 531|362156|\n", - "| 152|362156|\n", - "| 530|362156|\n", - "| 67|362156|\n", - "| 89|362156|\n", - "| 141|362157|\n", - "| 85|362157|\n", - "| 97|362157|\n", - "| 416|362157|\n", - "| 130|362157|\n", - "| 419|362157|\n", - "| 111|362157|\n", - "| 90|362157|\n", - "| 98|362157|\n", - "| 540|362157|\n", - "| 82|362157|\n", - "| 75|362157|\n", - "| 133|362157|\n", - "| 79|362157|\n", - "| 91|362157|\n", - "| 317|362157|\n", - "| 414|362157|\n", - "| 66|362157|\n", - "| 153|362157|\n", - "| 162|362157|\n", - "| 315|362157|\n", - "| 102|362157|\n", - "| 413|362157|\n", - "| 83|362157|\n", - "| 438|362158|\n", - "| 393|362158|\n", - "| 77|362158|\n", - "| 96|362158|\n", - "| 527|362158|\n", - "| 163|362158|\n", - "| 87|362158|\n", - "| 322|362158|\n", - "| 100|362158|\n", - "| 103|362158|\n", - "| 81|362158|\n", - "| 64|362158|\n", - "| 80|362158|\n", - "| 112|362158|\n", - "| 320|362158|\n", - "| 418|362158|\n", - "| 417|362158|\n", - "| 415|362158|\n", - "| 95|362158|\n", - "| 321|362158|\n", - "| 73|362158|\n", - "| 427|362158|\n", - "| 65|362158|\n", - "| 533|362158|\n", - "| 524|362158|\n", - "| 390|362158|\n", - "| 76|362158|\n", - "| 319|362158|\n", - "| 525|362158|\n", - "| 161|362158|\n", - "| 318|362158|\n", - "| 316|362158|\n", - "| 164|362158|\n", - "| 99|362158|\n", - "| 420|362158|\n", - "| 131|362158|\n", - "| 88|362158|\n", - "| 74|362158|\n", - "| 104|362158|\n", - "| 541|362158|\n", - "| 110|362158|\n", - "| 526|362158|\n", - "| 528|362158|\n", - "| 86|362158|\n", - "| 78|362159|\n", - "| 132|362159|\n", - "| 583|362159|\n", - "| 545|362159|\n", - "| 314|362159|\n", - "| 449|362159|\n", - "| 543|362159|\n", - "| 441|362159|\n", - "| 430|362159|\n", - "| 434|362159|\n", - "| 439|362159|\n", - "| 580|362159|\n", - "| 129|362159|\n", - "| 542|362159|\n", - "| 431|362159|\n", - "| 579|362159|\n", - "| 159|362159|\n", - "| 113|362159|\n", - "| 165|362159|\n", - "| 426|362159|\n", - "| 157|362159|\n", - "| 156|362159|\n", - "| 578|362159|\n", - "| 567|362159|\n", - "| 118|362159|\n", - "| 154|362159|\n", - "| 581|362159|\n", - "| 433|362159|\n", - "| 566|362159|\n", - "| 423|362159|\n", - "| 155|362159|\n", - "| 158|362159|\n", - "| 440|362159|\n", - "| 586|362160|\n", - "| 585|362160|\n", - "| 597|362160|\n", - "| 392|362160|\n", - "| 62|362160|\n", - "| 3|362160|\n", - "| 425|362160|\n", - "| 429|362160|\n", - "| 116|362160|\n", - "| 324|362160|\n", - "| 584|362160|\n", - "| 180|362160|\n", - "| 596|362160|\n", - "| 312|362160|\n", - "| 389|362160|\n", - "| 565|362160|\n", - "| 544|362160|\n", - "| 588|362160|\n", - "| 448|362160|\n", - "| 166|362160|\n", - "| 569|362160|\n", - "| 446|362160|\n", - "| 114|362160|\n", - "| 63|362160|\n", - "| 385|362160|\n", - "| 117|362160|\n", - "| 428|362160|\n", - "| 548|362160|\n", - "| 595|362160|\n", - "| 328|362160|\n", - "| 587|362160|\n", - "| 323|362160|\n", - "| 559|362160|\n", - "| 386|362160|\n", - "| 432|362160|\n", - "| 582|362160|\n", - "| 391|362160|\n", - "| 561|362160|\n", - "| 309|362160|\n", - "| 421|362160|\n", - "| 2|362160|\n", - "| 547|362160|\n", - "| 598|362160|\n", - "| 424|362160|\n", - "| 435|362160|\n", - "| 568|362160|\n", - "| 115|362160|\n", - "| 313|362160|\n", - "| 387|362160|\n", - "| 442|362160|\n", - "| 437|362160|\n", - "| 13|362160|\n", - "| 436|362160|\n", - "| 444|362160|\n", - "| 571|362160|\n", - "| 589|362160|\n", - "| 422|362160|\n", - "| 26|362161|\n", - "| 8|362161|\n", - "| 332|362161|\n", - "| 577|362161|\n", - "| 443|362161|\n", - "| 219|362161|\n", - "| 591|362161|\n", - "| 376|362161|\n", - "| 178|362161|\n", - "| 447|362161|\n", - "| 546|362161|\n", - "| 564|362161|\n", - "| 337|362161|\n", - "| 4|362161|\n", - "| 372|362161|\n", - "| 59|362161|\n", - "| 451|362161|\n", - "| 450|362161|\n", - "| 27|362161|\n", - "| 570|362161|\n", - "| 217|362161|\n", - "| 329|362161|\n", - "| 560|362161|\n", - "| 326|362161|\n", - "| 388|362161|\n", - "| 572|362161|\n", - "| 331|362161|\n", - "| 24|362161|\n", - "| 167|362161|\n", - "| 25|362161|\n", - "| 15|362161|\n", - "| 120|362161|\n", - "| 336|362161|\n", - "| 181|362161|\n", - "| 552|362161|\n", - "| 338|362161|\n", - "| 325|362161|\n", - "| 377|362161|\n", - "| 179|362161|\n", - "| 11|362161|\n", - "| 523|362161|\n", - "| 127|362161|\n", - "| 61|362161|\n", - "| 311|362161|\n", - "| 310|362161|\n", - "| 119|362161|\n", - "| 562|362161|\n", - "| 128|362161|\n", - "| 549|362161|\n", - "| 7|362161|\n", - "| 361|362161|\n", - "| 594|362161|\n", - "| 558|362161|\n", - "| 445|362161|\n", - "| 22|362162|\n", - "| 521|362162|\n", - "| 333|362162|\n", - "| 453|362162|\n", - "| 48|362162|\n", - "| 19|362162|\n", - "| 168|362162|\n", - "| 576|362162|\n", - "| 363|362162|\n", - "| 32|362162|\n", - "| 362|362162|\n", - "| 599|362162|\n", - "| 122|362162|\n", - "| 573|362162|\n", - "| 384|362162|\n", - "| 575|362162|\n", - "| 126|362162|\n", - "| 379|362162|\n", - "| 374|362162|\n", - "| 20|362162|\n", - "| 327|362162|\n", - "| 18|362162|\n", - "| 28|362162|\n", - "| 593|362162|\n", - "| 35|362162|\n", - "| 330|362162|\n", - "| 5|362162|\n", - "| 381|362162|\n", - "| 216|362162|\n", - "| 380|362162|\n", - "| 1|362162|\n", - "| 339|362162|\n", - "| 60|362162|\n", - "| 21|362162|\n", - "| 550|362162|\n", - "| 378|362162|\n", - "| 228|362162|\n", - "| 12|362162|\n", - "| 9|362162|\n", - "| 17|362162|\n", - "| 382|362162|\n", - "| 121|362162|\n", - "| 221|362162|\n", - "| 590|362162|\n", - "| 375|362162|\n", - "| 522|362162|\n", - "| 373|362162|\n", - "| 14|362162|\n", - "| 182|362162|\n", - "| 563|362162|\n", - "| 383|362162|\n", - "| 218|362162|\n", - "| 10|362162|\n", - "| 16|362162|\n", - "| 23|362162|\n", - "| 40|362163|\n", - "| 57|362163|\n", - "| 340|362163|\n", - "| 56|362163|\n", - "| 41|362163|\n", - "| 33|362163|\n", - "| 557|362163|\n", - "| 359|362163|\n", - "| 6|362163|\n", - "| 234|362163|\n", - "| 50|362163|\n", - "| 171|362163|\n", - "| 177|362163|\n", - "| 556|362163|\n", - "| 173|362163|\n", - "| 229|362163|\n", - "| 176|362163|\n", - "| 520|362163|\n", - "| 49|362163|\n", - "| 206|362163|\n", - "| 205|362163|\n", - "| 452|362163|\n", - "| 308|362163|\n", - "| 574|362163|\n", - "| 592|362163|\n", - "| 334|362163|\n", - "| 364|362163|\n", - "| 125|362163|\n", - "| 455|362163|\n", - "| 204|362163|\n", - "| 169|362163|\n", - "| 554|362163|\n", - "| 215|362163|\n", - "| 454|362163|\n", - "| 123|362163|\n", - "| 42|362163|\n", - "| 466|362163|\n", - "| 172|362163|\n", - "| 170|362163|\n", - "| 211|362163|\n", - "| 208|362163|\n", - "| 36|362163|\n", - "| 551|362163|\n", - "| 55|362163|\n", - "| 553|362163|\n", - "| 220|362163|\n", - "| 366|362163|\n", - "| 214|362163|\n", - "| 29|362163|\n", - "| 335|362163|\n", - "| 207|362163|\n", - "| 174|362163|\n", - "| 341|362163|\n", - "| 210|362163|\n", - "| 37|362163|\n", - "| 124|362163|\n", - "| 0|362163|\n", - "| 365|362163|\n", - "| 39|362163|\n", - "| 519|362164|\n", - "| 52|362164|\n", - "| 297|362164|\n", - "| 200|362164|\n", - "| 471|362164|\n", - "| 555|362164|\n", - "| 175|362164|\n", - "| 30|362164|\n", - "| 222|362164|\n", - "| 368|362164|\n", - "| 31|362164|\n", - "| 305|362164|\n", - "| 295|362164|\n", - "| 465|362164|\n", - "| 51|362164|\n", - "| 209|362164|\n", - "| 371|362164|\n", - "| 54|362164|\n", - "| 213|362164|\n", - "| 230|362164|\n", - "| 469|362164|\n", - "| 203|362164|\n", - "| 201|362164|\n", - "| 342|362164|\n", - "| 473|362164|\n", - "| 53|362164|\n", - "| 199|362164|\n", - "| 223|362164|\n", - "| 294|362164|\n", - "| 183|362164|\n", - "| 456|362164|\n", - "| 34|362164|\n", - "| 212|362164|\n", - "| 467|362164|\n", - "| 358|362164|\n", - "| 360|362164|\n", - "| 367|362164|\n", - "| 291|362164|\n", - "| 58|362164|\n", - "| 184|362164|\n", - "| 475|362164|\n", - "| 468|362164|\n", - "| 44|362165|\n", - "| 307|362165|\n", - "| 462|362165|\n", - "| 370|362165|\n", - "| 235|362165|\n", - "| 285|362165|\n", - "| 518|362165|\n", - "| 343|362165|\n", - "| 301|362165|\n", - "| 224|362165|\n", - "| 227|362165|\n", - "| 292|362165|\n", - "| 190|362165|\n", - "| 300|362165|\n", - "| 472|362165|\n", - "| 470|362165|\n", - "| 202|362165|\n", - "| 302|362165|\n", - "| 357|362165|\n", - "| 298|362165|\n", - "| 197|362165|\n", - "| 187|362165|\n", - "| 474|362165|\n", - "| 369|362165|\n", - "| 186|362165|\n", - "| 293|362165|\n", - "| 236|362165|\n", - "| 185|362165|\n", - "| 38|362165|\n", - "| 306|362165|\n", - "| 43|362165|\n", - "| 231|362165|\n", - "| 226|362165|\n", - "| 232|362165|\n", - "| 233|362165|\n", - "| 188|362165|\n", - "| 304|362165|\n", - "| 296|362165|\n", - "| 47|362165|\n", - "| 464|362165|\n", - "| 46|362165|\n", - "| 476|362165|\n", - "| 457|362165|\n", - "| 517|362165|\n", - "| 460|362166|\n", - "| 461|362166|\n", - "| 348|362166|\n", - "| 241|362166|\n", - "| 350|362166|\n", - "| 237|362166|\n", - "| 477|362166|\n", - "| 458|362166|\n", - "| 286|362166|\n", - "| 463|362166|\n", - "| 225|362166|\n", - "| 189|362166|\n", - "| 344|362166|\n", - "| 198|362166|\n", - "| 486|362166|\n", - "| 191|362166|\n", - "| 45|362166|\n", - "| 356|362166|\n", - "| 482|362166|\n", - "| 299|362166|\n", - "| 287|362166|\n", - "| 303|362166|\n", - "| 459|362166|\n", - "| 238|362166|\n", - "| 278|362167|\n", - "| 240|362167|\n", - "| 478|362167|\n", - "| 347|362167|\n", - "| 508|362167|\n", - "| 290|362167|\n", - "| 289|362167|\n", - "| 288|362167|\n", - "| 346|362167|\n", - "| 351|362167|\n", - "| 349|362167|\n", - "| 196|362167|\n", - "| 283|362167|\n", - "| 193|362167|\n", - "| 239|362167|\n", - "| 192|362167|\n", - "| 480|362167|\n", - "| 284|362167|\n", - "| 483|362167|\n", - "| 506|362168|\n", - "| 280|362168|\n", - "| 267|362168|\n", - "| 516|362168|\n", - "| 353|362168|\n", - "| 481|362168|\n", - "| 194|362168|\n", - "| 269|362168|\n", - "| 265|362168|\n", - "| 484|362168|\n", - "| 263|362168|\n", - "| 485|362168|\n", - "| 262|362168|\n", - "| 354|362168|\n", - "| 509|362168|\n", - "| 479|362168|\n", - "| 282|362168|\n", - "| 264|362168|\n", - "| 268|362168|\n", - "| 507|362168|\n", - "| 279|362168|\n", - "| 355|362168|\n", - "| 345|362168|\n", - "| 512|362168|\n", - "| 243|362168|\n", - "| 515|362168|\n", - "| 242|362168|\n", - "| 281|362168|\n", - "| 513|362168|\n", - "| 487|362168|\n", - "| 510|362168|\n", - "| 352|362168|\n", - "| 488|362168|\n", - "| 270|362169|\n", - "| 514|362169|\n", - "| 511|362169|\n", - "| 261|362169|\n", - "| 245|362169|\n", - "| 195|362169|\n", - "| 258|362170|\n", - "| 276|362170|\n", - "| 503|362170|\n", - "| 273|362170|\n", - "| 244|362170|\n", - "| 277|362170|\n", - "| 274|362170|\n", - "| 489|362170|\n", - "| 260|362170|\n", - "| 275|362170|\n", - "| 247|362170|\n", - "| 271|362170|\n", - "| 246|362170|\n", - "| 272|362170|\n", - "| 266|362170|\n", - "| 490|362170|\n", - "| 250|362171|\n", - "| 504|362171|\n", - "| 491|362171|\n", - "| 505|362171|\n", - "| 249|362171|\n", - "| 492|362171|\n", - "| 248|362171|\n", - "| 493|362172|\n", - "| 259|362172|\n", - "| 502|362172|\n", - "| 501|362173|\n", - "| 254|362173|\n", - "| 253|362173|\n", - "| 251|362173|\n", - "| 494|362174|\n", - "| 255|362174|\n", - "| 252|362174|\n", - "| 495|362174|\n", - "| 256|362174|\n", - "| 500|362174|\n", - "| 497|362175|\n", - "| 257|362175|\n", - "| 496|362176|\n", - "| 499|362176|\n", - "| 498|362177|\n", - "+-----------+------+\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " \r" - ] - } - ], - "source": [ - "# repartitioning to 600 partitions, seems to be balanced now. \n", - "df_all = df_all.repartition(600)\n", - "displaypartitions(df_all)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4b98e4e2-0cf7-406d-8b64-b47e5829e40e", - "metadata": {}, - "outputs": [], - "source": [ - "# we will need a year column in this model:\n", - "df_all = df_all.withColumn('year', F.year(df_all.start_timestamp))" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "7fa0c27a-245a-4d9b-bbb9-785c828a3317", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "root\n", - " |-- ID: string (nullable = true)\n", - " |-- start_timestamp: timestamp (nullable = true)\n", - " |-- end_timestamp: timestamp (nullable = true)\n", - " |-- seconds: integer (nullable = true)\n", - " |-- miles: double (nullable = true)\n", - " |-- pickup_tract: long (nullable = true)\n", - " |-- dropoff_tract: long (nullable = true)\n", - " |-- pickup_area: integer (nullable = true)\n", - " |-- dropoff_area: integer (nullable = true)\n", - " |-- Fare: double (nullable = true)\n", - " |-- Tip: integer (nullable = true)\n", - " |-- total: double (nullable = true)\n", - " |-- pickup_lat: double (nullable = true)\n", - " |-- pickup_lon: double (nullable = true)\n", - " |-- dropoff_lat: double (nullable = true)\n", - " |-- dropoff_lon: string (nullable = true)\n", - " |-- month: integer (nullable = true)\n", - " |-- day_of_month: integer (nullable = true)\n", - " |-- hour: integer (nullable = true)\n", - " |-- day: integer (nullable = true)\n", - " |-- year: integer (nullable = true)\n", - "\n" - ] - } - ], - "source": [ - "df_all.printSchema()" - ] - }, - { - "cell_type": "markdown", - "id": "1b8404b4-c67e-4c8f-a699-40c6fef660e5", - "metadata": {}, - "source": [ - "## Next steps" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "41122437-d77b-4fa6-91f8-22126f875a52", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "access @ file:///home/conda/feedstock_root/build_artifacts/access_1696558639912/work\n", - "affine @ file:///home/conda/feedstock_root/build_artifacts/affine_1674245120525/work\n", - "aiohttp @ file:///home/conda/feedstock_root/build_artifacts/aiohttp_1696765416168/work\n", - "aiosignal @ file:///home/conda/feedstock_root/build_artifacts/aiosignal_1667935791922/work\n", - "alabaster @ file:///home/conda/feedstock_root/build_artifacts/alabaster_1673645646525/work\n", - "alembic @ file:///home/conda/feedstock_root/build_artifacts/alembic_1698347477885/work\n", - "amply @ file:///home/conda/feedstock_root/build_artifacts/amply_1687675480808/work\n", - "ansiwrap==0.8.4\n", - "anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1688651106312/work/dist\n", - "appdirs @ file:///home/conda/feedstock_root/build_artifacts/appdirs_1603108395799/work\n", - "argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1692818318753/work\n", - "argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1695386548039/work\n", - "arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1696128962909/work\n", - "astroid @ file:///home/conda/feedstock_root/build_artifacts/astroid_1697450283802/work\n", - "asttokens @ file:///home/conda/feedstock_root/build_artifacts/asttokens_1698341106958/work\n", - "async-generator==1.10\n", - "async-timeout @ file:///home/conda/feedstock_root/build_artifacts/async-timeout_1691763562544/work\n", - "atomicwrites @ file:///home/conda/feedstock_root/build_artifacts/atomicwrites_1657325823582/work\n", - "attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1683424013410/work\n", - "autopep8 @ file:///home/conda/feedstock_root/build_artifacts/autopep8_1615918605177/work\n", - "Babel @ file:///home/conda/feedstock_root/build_artifacts/babel_1698174530262/work\n", - "backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work\n", - "backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1687772187254/work\n", - "bcolz==1.2.1\n", - "beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1680888073205/work\n", - "binaryornot==0.4.4\n", - "black @ file:///home/conda/feedstock_root/build_artifacts/black-recipe_1622561163993/work\n", - "bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1696630167146/work\n", - "blinker @ file:///home/conda/feedstock_root/build_artifacts/blinker_1698890160476/work\n", - "bokeh @ file:///home/conda/feedstock_root/build_artifacts/bokeh_1652969564918/work\n", - "branca @ file:///home/conda/feedstock_root/build_artifacts/branca_1699295994965/work\n", - "Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1687884021435/work\n", - "brotlipy @ file:///home/conda/feedstock_root/build_artifacts/brotlipy_1695621656497/work\n", - "cachetools==4.2.4\n", - "certifi==2023.7.22\n", - "certipy==0.1.3\n", - "cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1696001773319/work\n", - "chardet @ file:///home/conda/feedstock_root/build_artifacts/chardet_1649184112677/work\n", - "charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1698833585322/work\n", - "click==7.1.2\n", - "click-plugins==1.1.1\n", - "cligj @ file:///home/conda/feedstock_root/build_artifacts/cligj_1633637764473/work\n", - "cloudpickle @ file:///home/conda/feedstock_root/build_artifacts/cloudpickle_1697464713350/work\n", - "colorama @ file:///home/conda/feedstock_root/build_artifacts/colorama_1666700638685/work\n", - "conda==4.9.2\n", - "conda-package-handling @ file:///home/conda/feedstock_root/build_artifacts/conda-package-handling_1691048088238/work\n", - "conda_package_streaming @ file:///home/conda/feedstock_root/build_artifacts/conda-package-streaming_1691009212940/work\n", - "confuse @ file:///home/conda/feedstock_root/build_artifacts/confuse_1680699073356/work\n", - "cookiecutter @ file:///home/conda/feedstock_root/build_artifacts/cookiecutter_1643669229020/work\n", - "coverage @ file:///home/conda/feedstock_root/build_artifacts/coverage_1696281775256/work\n", - "cryptography @ file:///home/conda/feedstock_root/build_artifacts/cryptography-split_1672672380968/work\n", - "cycler @ file:///home/conda/feedstock_root/build_artifacts/cycler_1696677705766/work\n", - "Cython @ file:///home/conda/feedstock_root/build_artifacts/cython_1695285659207/work\n", - "cytoolz @ file:///home/conda/feedstock_root/build_artifacts/cytoolz_1695545170008/work\n", - "dask @ file:///home/conda/feedstock_root/build_artifacts/dask-core_1607657054678/work\n", - "dataclasses @ file:///home/conda/feedstock_root/build_artifacts/dataclasses_1628958434797/work\n", - "debugpy @ file:///home/conda/feedstock_root/build_artifacts/debugpy_1695534280282/work\n", - "decorator @ file:///home/conda/feedstock_root/build_artifacts/decorator_1641555617451/work\n", - "defusedxml @ file:///home/conda/feedstock_root/build_artifacts/defusedxml_1615232257335/work\n", - "deprecation @ file:///home/conda/feedstock_root/build_artifacts/deprecation_1589881437857/work\n", - "descartes==1.1.0\n", - "diff-match-patch @ file:///home/conda/feedstock_root/build_artifacts/diff-match-patch_1683670697993/work\n", - "dill @ file:///home/conda/feedstock_root/build_artifacts/dill_1690101045195/work\n", - "distlib @ file:///home/conda/feedstock_root/build_artifacts/distlib_1689598491484/work\n", - "distributed @ file:///home/conda/feedstock_root/build_artifacts/distributed_1611361822694/work\n", - "docutils @ file:///home/conda/feedstock_root/build_artifacts/docutils_1695300443287/work\n", - "entrypoints @ file:///home/conda/feedstock_root/build_artifacts/entrypoints_1643888246732/work\n", - "esda @ file:///home/conda/feedstock_root/build_artifacts/esda_1660931045600/work\n", - "exceptiongroup @ file:///home/conda/feedstock_root/build_artifacts/exceptiongroup_1692026125334/work\n", - "executing @ file:///home/conda/feedstock_root/build_artifacts/executing_1698579936712/work\n", - "fastavro @ file:///home/conda/feedstock_root/build_artifacts/fastavro_1652900770404/work\n", - "fastjsonschema @ file:///home/conda/feedstock_root/build_artifacts/python-fastjsonschema_1696171779618/work/dist\n", - "fastparquet @ file:///home/conda/feedstock_root/build_artifacts/fastparquet_1619039245868/work\n", - "filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1698714947081/work\n", - "findspark @ file:///home/conda/feedstock_root/build_artifacts/findspark_1644599740637/work\n", - "Fiona @ file:///home/conda/feedstock_root/build_artifacts/fiona_1653911984590/work\n", - "flake8 @ file:///home/conda/feedstock_root/build_artifacts/flake8_1601874335748/work\n", - "folium @ file:///home/conda/feedstock_root/build_artifacts/folium_1699298670193/work\n", - "frozenlist @ file:///home/conda/feedstock_root/build_artifacts/frozenlist_1695377782835/work\n", - "fsspec @ file:///home/conda/feedstock_root/build_artifacts/fsspec_1618579848600/work\n", - "future @ file:///home/conda/feedstock_root/build_artifacts/future_1673596611778/work\n", - "gcsfs @ file:///home/conda/feedstock_root/build_artifacts/gcsfs_1618251324500/work\n", - "GDAL==3.5.0\n", - "geopandas @ file:///home/conda/feedstock_root/build_artifacts/geopandas_1686057576800/work\n", - "giddy @ file:///home/conda/feedstock_root/build_artifacts/giddy_1696344753517/work\n", - "gitdb @ file:///home/conda/feedstock_root/build_artifacts/gitdb_1697791558612/work\n", - "GitPython @ file:///home/conda/feedstock_root/build_artifacts/gitpython_1697650329377/work\n", - "gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1666808683138/work\n", - "google-api-core==1.34.0\n", - "google-auth==1.35.0\n", - "google-auth-oauthlib==0.5.3\n", - "google-cloud-bigquery==3.13.0\n", - "google-cloud-bigquery-storage==2.1.0\n", - "google-cloud-bigtable==1.6.1\n", - "google-cloud-container==2.3.1\n", - "google-cloud-core==2.3.3\n", - "google-cloud-datacatalog==3.0.0\n", - "google-cloud-dataproc==2.2.0\n", - "google-cloud-datastore==2.1.6\n", - "google-cloud-language==2.0.0\n", - "google-cloud-logging==2.1.1\n", - "google-cloud-monitoring==2.0.1\n", - "google-cloud-pubsub==2.2.0\n", - "google-cloud-redis==2.0.0\n", - "google-cloud-spanner==2.1.1\n", - "google-cloud-speech==2.0.1\n", - "google-cloud-storage==2.11.0\n", - "google-cloud-texttospeech==2.2.0\n", - "google-cloud-translate==3.0.2\n", - "google-cloud-vision==2.0.0\n", - "google-crc32c==1.5.0\n", - "google-resumable-media==2.6.0\n", - "googleapis-common-protos==1.61.0\n", - "googlemaps==4.10.0\n", - "graphframes==0.6\n", - "greenlet @ file:///home/conda/feedstock_root/build_artifacts/greenlet_1698243377683/work\n", - "grpc-google-iam-v1==0.12.7\n", - "grpcio==1.59.2\n", - "grpcio-status==1.48.2\n", - "htmlmin==0.1.12\n", - "httplib2 @ file:///home/conda/feedstock_root/build_artifacts/httplib2_1617134439639/work\n", - "idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1593328102638/work\n", - "imagecodecs @ file:///home/conda/feedstock_root/build_artifacts/imagecodecs_1662930206934/work\n", - "ImageHash @ file:///home/conda/feedstock_root/build_artifacts/imagehash_1664371213222/work\n", - "imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1696854106455/work\n", - "imagesize @ file:///home/conda/feedstock_root/build_artifacts/imagesize_1656939531508/work\n", - "importlib-metadata @ file:///home/conda/feedstock_root/build_artifacts/importlib-metadata_1688754491823/work\n", - "importlib-resources @ file:///home/conda/feedstock_root/build_artifacts/importlib_resources_1699364556997/work\n", - "inequality==1.0.0\n", - "inflection @ file:///home/conda/feedstock_root/build_artifacts/inflection_1598089801258/work\n", - "iniconfig @ file:///home/conda/feedstock_root/build_artifacts/iniconfig_1673103042956/work\n", - "intervaltree @ file:///home/conda/feedstock_root/build_artifacts/intervaltree_1683532206518/work\n", - "ipykernel @ file:///home/conda/feedstock_root/build_artifacts/ipykernel_1655241626755/work\n", - "ipyparallel @ file:///home/conda/feedstock_root/build_artifacts/ipyparallel_1607986704956/work\n", - "ipython @ file:///home/conda/feedstock_root/build_artifacts/ipython_1683289033986/work\n", - "ipython-genutils==0.2.0\n", - "ipython-sql @ file:///home/conda/feedstock_root/build_artifacts/ipython-sql_1602667917966/work\n", - "ipywidgets @ file:///home/conda/feedstock_root/build_artifacts/ipywidgets_1660942226216/work\n", - "isort @ file:///home/conda/feedstock_root/build_artifacts/isort_1675033873689/work\n", - "jaraco.classes @ file:///home/conda/feedstock_root/build_artifacts/jaraco.classes_1689112411129/work\n", - "jedi @ file:///home/conda/feedstock_root/build_artifacts/jedi_1635823949331/work\n", - "jeepney @ file:///home/conda/feedstock_root/build_artifacts/jeepney_1649085214306/work\n", - "Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/jinja2_1636510082894/work\n", - "jinja2-time @ file:///home/conda/feedstock_root/build_artifacts/jinja2-time_1646750632133/work\n", - "joblib @ file:///home/conda/feedstock_root/build_artifacts/joblib_1691577114857/work\n", - "json5 @ file:///home/conda/feedstock_root/build_artifacts/json5_1688248289187/work\n", - "jsonschema @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-meta_1698678498820/work\n", - "jsonschema-specifications @ file:///home/conda/feedstock_root/build_artifacts/jsonschema-specifications_1689701150890/work\n", - "jupyter-client @ file:///home/conda/feedstock_root/build_artifacts/jupyter_client_1649327809992/work\n", - "jupyter-contrib-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_core_1657548529421/work\n", - "jupyter-contrib-nbextensions @ file:///home/conda/feedstock_root/build_artifacts/jupyter_contrib_nbextensions_1602805456242/work\n", - "jupyter-core @ file:///home/conda/feedstock_root/build_artifacts/jupyter_core_1652365251650/work\n", - "# Editable install with no version control (jupyter-gcs-contents-manager==0.0.1)\n", - "-e /opt/dataproc/jupyter/jupyter-extensions-67f08e19469494ace1b953c515b09ae960c1a4ec/jupyter-gcs-contents-manager\n", - "jupyter-highlight-selected-word @ file:///home/conda/feedstock_root/build_artifacts/jupyter_highlight_selected_word_1638382841351/work\n", - "jupyter-http-over-ws @ file:///home/conda/feedstock_root/build_artifacts/jupyter_http_over_ws_1597332535364/work\n", - "jupyter-latex-envs @ file:///home/conda/feedstock_root/build_artifacts/jupyter_latex_envs_1614808832269/work\n", - "jupyter-nbextensions-configurator @ file:///home/conda/feedstock_root/build_artifacts/jupyter_nbextensions_configurator_1670793770953/work\n", - "jupyter-server @ file:///home/conda/feedstock_root/build_artifacts/jupyter_server_1647940913071/work\n", - "jupyter-server-mathjax @ file:///home/conda/feedstock_root/build_artifacts/jupyter-server-mathjax_1672324512570/work\n", - "jupyter-telemetry @ file:///home/conda/feedstock_root/build_artifacts/jupyter_telemetry_1605173804246/work\n", - "jupyterhub @ file:///home/conda/feedstock_root/build_artifacts/jupyterhub-feedstock_1614255305026/work\n", - "jupyterlab @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_1632809509349/work\n", - "jupyterlab-git @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab-git_1620032639379/work\n", - "jupyterlab-pygments @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_pygments_1649936611996/work\n", - "jupyterlab-widgets @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_widgets_1631590465624/work\n", - "jupyterlab_server @ file:///home/conda/feedstock_root/build_artifacts/jupyterlab_server_1671827361623/work\n", - "kaggle==1.5.16\n", - "keyring @ file:///home/conda/feedstock_root/build_artifacts/keyring_1696001522137/work\n", - "kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/kiwisolver_1695379923772/work\n", - "koalas @ file:///home/conda/feedstock_root/build_artifacts/koalas_1605320953654/work\n", - "libcst==1.1.0\n", - "libpysal @ file:///home/conda/feedstock_root/build_artifacts/libpysal_1668782270408/work\n", - "llvmlite==0.36.0\n", - "locket @ file:///home/conda/feedstock_root/build_artifacts/locket_1650660393415/work\n", - "lxml @ file:///home/conda/feedstock_root/build_artifacts/lxml_1649697664536/work\n", - "Mako @ file:///home/conda/feedstock_root/build_artifacts/mako_1699482234420/work\n", - "mamba @ file:///home/conda/feedstock_root/build_artifacts/mamba_1629310321864/work\n", - "mapclassify @ file:///home/conda/feedstock_root/build_artifacts/mapclassify_1673861555770/work\n", - "Markdown @ file:///home/conda/feedstock_root/build_artifacts/markdown_1651821407140/work\n", - "MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1695367437975/work\n", - "matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1632416634429/work\n", - "matplotlib-inline @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-inline_1660814786464/work\n", - "mccabe==0.6.1\n", - "metakernel @ file:///home/conda/feedstock_root/build_artifacts/metakernel_1648594625035/work\n", - "mgwr @ file:///home/conda/feedstock_root/build_artifacts/mgwr_1696605875605/work\n", - "missingno==0.4.2\n", - "mistune @ file:///home/conda/feedstock_root/build_artifacts/mistune_1635844675081/work\n", - "mock @ file:///home/conda/feedstock_root/build_artifacts/mock_1689092066756/work\n", - "more-itertools @ file:///home/conda/feedstock_root/build_artifacts/more-itertools_1691086935839/work\n", - "mpmath @ file:///home/conda/feedstock_root/build_artifacts/mpmath_1678228039184/work\n", - "msgpack @ file:///home/conda/feedstock_root/build_artifacts/msgpack-python_1695464102412/work\n", - "multidict @ file:///home/conda/feedstock_root/build_artifacts/multidict_1696716067907/work\n", - "munch @ file:///home/conda/feedstock_root/build_artifacts/munch_1688318326844/work\n", - "mypy-extensions @ file:///home/conda/feedstock_root/build_artifacts/mypy_extensions_1675543315189/work\n", - "nbclassic @ file:///home/conda/feedstock_root/build_artifacts/nbclassic_1682598306082/work\n", - "nbclient @ file:///home/conda/feedstock_root/build_artifacts/nbclient_1646999386773/work\n", - "nbconvert @ file:///home/conda/feedstock_root/build_artifacts/nbconvert_1605401836768/work\n", - "nbdime @ file:///home/conda/feedstock_root/build_artifacts/nbdime_1618448032595/work\n", - "nbformat @ file:///home/conda/feedstock_root/build_artifacts/nbformat_1690814868471/work\n", - "nest-asyncio @ file:///home/conda/feedstock_root/build_artifacts/nest-asyncio_1697083700168/work\n", - "networkx @ file:///home/conda/feedstock_root/build_artifacts/networkx_1680692919326/work\n", - "nltk @ file:///home/conda/feedstock_root/build_artifacts/nltk_1633093058893/work\n", - "nose==1.3.7\n", - "notebook @ file:///home/conda/feedstock_root/build_artifacts/notebook_1610575313697/work\n", - "notebook_shim @ file:///home/conda/feedstock_root/build_artifacts/notebook-shim_1682360583588/work\n", - "numba @ file:///home/conda/feedstock_root/build_artifacts/numba_1623568544775/work\n", - "numexpr @ file:///home/conda/feedstock_root/build_artifacts/numexpr_1658076426113/work\n", - "numpy @ file:///home/conda/feedstock_root/build_artifacts/numpy_1649281352817/work\n", - "numpydoc @ file:///home/conda/feedstock_root/build_artifacts/numpydoc_1665273484262/work\n", - "oauth2client==4.1.3\n", - "oauthlib @ file:///home/conda/feedstock_root/build_artifacts/oauthlib_1666056362788/work\n", - "opendatasets==0.1.22\n", - "packaging @ file:///home/conda/feedstock_root/build_artifacts/packaging_1696202382185/work\n", - "pamela @ file:///home/conda/feedstock_root/build_artifacts/pamela_1691565434937/work\n", - "pandas==1.2.5\n", - "pandas-profiling @ file:///home/conda/feedstock_root/build_artifacts/pandas-profiling_1613839428900/work\n", - "pandocfilters @ file:///home/conda/feedstock_root/build_artifacts/pandocfilters_1631603243851/work\n", - "papermill @ file:///home/conda/feedstock_root/build_artifacts/papermill_1604950649566/work\n", - "parso==0.7.0\n", - "partd @ file:///home/conda/feedstock_root/build_artifacts/partd_1695667515973/work\n", - "pathspec @ file:///home/conda/feedstock_root/build_artifacts/pathspec_1690597952537/work\n", - "patsy @ file:///home/conda/feedstock_root/build_artifacts/patsy_1665356157073/work\n", - "pexpect==4.8.0\n", - "phik @ file:///home/conda/feedstock_root/build_artifacts/phik_1697266240235/work\n", - "pickleshare @ file:///home/conda/feedstock_root/build_artifacts/pickleshare_1602535658641/work\n", - "Pillow @ file:///home/conda/feedstock_root/build_artifacts/pillow_1666920566244/work\n", - "pkgutil_resolve_name @ file:///home/conda/feedstock_root/build_artifacts/pkgutil-resolve-name_1694617248815/work\n", - "platformdirs @ file:///home/conda/feedstock_root/build_artifacts/platformdirs_1699715570510/work\n", - "pluggy @ file:///home/conda/feedstock_root/build_artifacts/pluggy_1693086607691/work\n", - "pointpats @ file:///home/conda/feedstock_root/build_artifacts/pointpats_1678201881705/work\n", - "pooch @ file:///home/conda/feedstock_root/build_artifacts/pooch_1698245576425/work\n", - "portalocker @ file:///home/conda/feedstock_root/build_artifacts/portalocker_1695662050140/work\n", - "poyo==0.5.0\n", - "prettytable @ file:///home/conda/feedstock_root/build_artifacts/prettytable_1694464263010/work\n", - "prometheus-client @ file:///home/conda/feedstock_root/build_artifacts/prometheus_client_1698692549203/work\n", - "prompt-toolkit @ file:///home/conda/feedstock_root/build_artifacts/prompt-toolkit_1699631011458/work\n", - "proto-plus==1.22.3\n", - "protobuf==3.20.3\n", - "psutil @ file:///home/conda/feedstock_root/build_artifacts/psutil_1695367190297/work\n", - "ptyprocess @ file:///home/conda/feedstock_root/build_artifacts/ptyprocess_1609419310487/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl\n", - "PuLP @ file:///home/conda/feedstock_root/build_artifacts/pulp_1695847465904/work\n", - "pure-eval @ file:///home/conda/feedstock_root/build_artifacts/pure_eval_1642875951954/work\n", - "pure-sasl @ file:///home/conda/feedstock_root/build_artifacts/pure-sasl_1631890804823/work\n", - "py4j==0.10.9\n", - "pyarrow==2.0.0\n", - "pyasn1 @ file:///home/conda/feedstock_root/build_artifacts/pyasn1_1694615621498/work\n", - "pyasn1-modules @ file:///home/conda/feedstock_root/build_artifacts/pyasn1-modules_1695107857548/work\n", - "pycodestyle @ file:///home/conda/feedstock_root/build_artifacts/pycodestyle_1589305246696/work\n", - "pycosat @ file:///home/conda/feedstock_root/build_artifacts/pycosat_1696355775111/work\n", - "pycparser @ file:///home/conda/feedstock_root/build_artifacts/pycparser_1636257122734/work\n", - "pycurl==7.45.1\n", - "pydocstyle @ file:///home/conda/feedstock_root/build_artifacts/pydocstyle_1673997487070/work\n", - "pydot @ file:///home/conda/feedstock_root/build_artifacts/pydot_1695469127091/work\n", - "pyflakes==2.2.0\n", - "Pygments @ file:///home/conda/feedstock_root/build_artifacts/pygments_1691408637400/work\n", - "PyHive @ file:///home/conda/feedstock_root/build_artifacts/pyhive_1646707521362/work\n", - "PyJWT @ file:///home/conda/feedstock_root/build_artifacts/pyjwt_1689721553971/work\n", - "pylint @ file:///home/conda/feedstock_root/build_artifacts/pylint_1698005019851/work\n", - "pyls-black @ file:///home/conda/feedstock_root/build_artifacts/pyls-black_1595615126037/work\n", - "pyls-spyder @ file:///home/conda/feedstock_root/build_artifacts/pyls-spyder_1613487177406/work\n", - "pyOpenSSL @ file:///home/conda/feedstock_root/build_artifacts/pyopenssl_1685514481738/work\n", - "pyparsing @ file:///home/conda/feedstock_root/build_artifacts/pyparsing_1635267989520/work\n", - "pyproj @ file:///home/conda/feedstock_root/build_artifacts/pyproj_1650803108421/work\n", - "PyQt5==5.12.3\n", - "PyQt5_sip==4.19.18\n", - "PyQtChart==5.12\n", - "PyQtWebEngine==5.12.1\n", - "pysal @ file:///home/conda/feedstock_root/build_artifacts/pysal_1612819487814/work\n", - "PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1661604839144/work\n", - "# Editable install with no version control (pyspark==3.1.3)\n", - "-e /usr/lib/spark/python\n", - "pytest @ file:///home/conda/feedstock_root/build_artifacts/pytest_1698233724984/work\n", - "pytest-cov @ file:///home/conda/feedstock_root/build_artifacts/pytest-cov_1684964868191/work\n", - "python-dateutil==2.8.0\n", - "python-json-logger @ file:///home/conda/feedstock_root/build_artifacts/python-json-logger_1677079630776/work\n", - "python-jsonrpc-server @ file:///home/conda/feedstock_root/build_artifacts/python-jsonrpc-server_1599827444631/work\n", - "python-language-server @ file:///home/conda/feedstock_root/build_artifacts/python-language-server_1607720213724/work\n", - "python-slugify @ file:///home/conda/feedstock_root/build_artifacts/python-slugify-split_1694282063120/work\n", - "pytoolconfig @ file:///home/conda/feedstock_root/build_artifacts/pytoolconfig_1675124745143/work\n", - "pytz @ file:///home/conda/feedstock_root/build_artifacts/pytz_1693930252784/work\n", - "pyu2f @ file:///home/conda/feedstock_root/build_artifacts/pyu2f_1604248910016/work\n", - "PyWavelets @ file:///home/conda/feedstock_root/build_artifacts/pywavelets_1649616412805/work\n", - "pyxdg @ file:///home/conda/feedstock_root/build_artifacts/pyxdg_1654536799286/work\n", - "PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1695373436676/work\n", - "pyzmq @ file:///home/conda/feedstock_root/build_artifacts/pyzmq_1698062423217/work\n", - "QDarkStyle @ file:///home/conda/feedstock_root/build_artifacts/qdarkstyle_1617328841504/work\n", - "qstylizer @ file:///home/conda/feedstock_root/build_artifacts/qstylizer_1662244505808/work/dist/qstylizer-0.2.2-py2.py3-none-any.whl\n", - "QtAwesome @ file:///home/conda/feedstock_root/build_artifacts/qtawesome_1678418951316/work\n", - "qtconsole @ file:///home/conda/feedstock_root/build_artifacts/qtconsole-base_1699244156891/work\n", - "QtPy @ file:///home/conda/feedstock_root/build_artifacts/qtpy_1698112029416/work\n", - "quantecon @ file:///home/conda/feedstock_root/build_artifacts/quantecon_1655746571862/work\n", - "rasterio @ file:///home/conda/feedstock_root/build_artifacts/rasterio_1655388667652/work\n", - "rasterstats @ file:///home/conda/feedstock_root/build_artifacts/rasterstats_1685447679213/work\n", - "referencing @ file:///home/conda/feedstock_root/build_artifacts/referencing_1691337268233/work\n", - "regex @ file:///home/conda/feedstock_root/build_artifacts/regex_1617644422046/work\n", - "requests @ file:///home/conda/feedstock_root/build_artifacts/requests_1608156231189/work\n", - "requests-oauthlib @ file:///home/conda/feedstock_root/build_artifacts/requests-oauthlib_1643557462909/work\n", - "rope @ file:///home/conda/feedstock_root/build_artifacts/rope_1699525256910/work\n", - "rpds-py @ file:///home/conda/feedstock_root/build_artifacts/rpds-py_1699109843138/work\n", - "rsa @ file:///home/conda/feedstock_root/build_artifacts/rsa_1658328885051/work\n", - "Rtree @ file:///home/conda/feedstock_root/build_artifacts/rtree_1637430736605/work\n", - "ruamel-yaml-conda @ file:///home/conda/feedstock_root/build_artifacts/ruamel_yaml_1695546195936/work\n", - "ruamel.yaml @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml_1699007344708/work\n", - "ruamel.yaml.clib @ file:///home/conda/feedstock_root/build_artifacts/ruamel.yaml.clib_1695996844669/work\n", - "scikit-image @ file:///home/conda/feedstock_root/build_artifacts/scikit-image_1638363134145/work\n", - "scikit-learn @ file:///home/conda/feedstock_root/build_artifacts/scikit-learn_1630910537183/work\n", - "scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy_1619561901336/work\n", - "seaborn @ file:///home/conda/feedstock_root/build_artifacts/seaborn-split_1629095986539/work\n", - "SecretStorage @ file:///home/conda/feedstock_root/build_artifacts/secretstorage_1695551746400/work\n", - "segregation @ file:///home/conda/feedstock_root/build_artifacts/segregation_1696427305843/work\n", - "Send2Trash @ file:///home/conda/feedstock_root/build_artifacts/send2trash_1682601222253/work\n", - "Shapely @ file:///home/conda/feedstock_root/build_artifacts/shapely_1651793098501/work\n", - "simplejson @ file:///home/conda/feedstock_root/build_artifacts/simplejson_1696595967770/work\n", - "six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work\n", - "smmap @ file:///home/conda/feedstock_root/build_artifacts/smmap_1634310307496/work\n", - "sniffio @ file:///home/conda/feedstock_root/build_artifacts/sniffio_1662051266223/work\n", - "snowballstemmer @ file:///home/conda/feedstock_root/build_artifacts/snowballstemmer_1637143057757/work\n", - "snuggs==1.4.7\n", - "sortedcontainers @ file:///home/conda/feedstock_root/build_artifacts/sortedcontainers_1621217038088/work\n", - "soupsieve @ file:///home/conda/feedstock_root/build_artifacts/soupsieve_1693929250441/work\n", - "spaghetti @ file:///home/conda/feedstock_root/build_artifacts/spaghetti_1696295637619/work\n", - "spark-nlp==4.4.0\n", - "spark-nlp-display==4.4\n", - "spglm @ file:///home/conda/feedstock_root/build_artifacts/spglm_1698250481025/work\n", - "Sphinx @ file:///home/conda/feedstock_root/build_artifacts/sphinx_1690955392406/work\n", - "sphinxcontrib-applehelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-applehelp_1674487779667/work\n", - "sphinxcontrib-devhelp==1.0.2\n", - "sphinxcontrib-htmlhelp @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-htmlhelp_1675256494457/work\n", - "sphinxcontrib-jsmath @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-jsmath_1691604704163/work\n", - "sphinxcontrib-qthelp==1.0.3\n", - "sphinxcontrib-serializinghtml @ file:///home/conda/feedstock_root/build_artifacts/sphinxcontrib-serializinghtml_1649380998999/work\n", - "spint @ file:///home/conda/feedstock_root/build_artifacts/spint_1696602360359/work\n", - "splot @ file:///home/conda/feedstock_root/build_artifacts/splot_1649898658322/work\n", - "spopt @ file:///home/conda/feedstock_root/build_artifacts/spopt_1655150061954/work\n", - "spreg @ file:///home/conda/feedstock_root/build_artifacts/spreg_1695792092600/work\n", - "spvcm @ file:///home/conda/feedstock_root/build_artifacts/spvcm_1696623913651/work\n", - "spyder @ file:///home/conda/feedstock_root/build_artifacts/spyder_1627140945937/work\n", - "spyder-kernels @ file:///home/conda/feedstock_root/build_artifacts/spyder-kernels_1625331173960/work\n", - "spylon==0.3.0\n", - "spylon-kernel==0.4.1\n", - "SQLAlchemy @ file:///home/conda/feedstock_root/build_artifacts/sqlalchemy_1697018588089/work\n", - "sqlparse @ file:///home/conda/feedstock_root/build_artifacts/sqlparse_1681817562700/work\n", - "stack-data @ file:///home/conda/feedstock_root/build_artifacts/stack_data_1669632077133/work\n", - "statsmodels @ file:///home/conda/feedstock_root/build_artifacts/statsmodels_1654787099639/work\n", - "svgwrite==1.4\n", - "sympy @ file:///home/conda/feedstock_root/build_artifacts/sympy_1618015367433/work\n", - "tables @ file:///home/conda/feedstock_root/build_artifacts/pytables_1638208858826/work\n", - "tangled-up-in-unicode @ file:///home/conda/feedstock_root/build_artifacts/tangled-up-in-unicode_1632832610704/work\n", - "tblib @ file:///home/conda/feedstock_root/build_artifacts/tblib_1694702375735/work\n", - "tenacity @ file:///home/conda/feedstock_root/build_artifacts/tenacity_1692026804430/work\n", - "terminado @ file:///home/conda/feedstock_root/build_artifacts/terminado_1699810101464/work\n", - "testpath @ file:///home/conda/feedstock_root/build_artifacts/testpath_1645693042223/work\n", - "text-unidecode @ file:///home/conda/feedstock_root/build_artifacts/text-unidecode_1694707102786/work\n", - "textdistance @ file:///home/conda/feedstock_root/build_artifacts/textdistance_1663527496115/work\n", - "textwrap3==0.9.2\n", - "threadpoolctl @ file:///home/conda/feedstock_root/build_artifacts/threadpoolctl_1689261241048/work\n", - "three-merge @ file:///home/conda/feedstock_root/build_artifacts/three-merge_1595515817927/work\n", - "thrift @ file:///home/conda/feedstock_root/build_artifacts/thrift_1695546065194/work/lib/py\n", - "thrift-sasl @ file:///home/conda/feedstock_root/build_artifacts/thrift_sasl_1631824374965/work\n", - "tifffile @ file:///home/conda/feedstock_root/build_artifacts/tifffile_1665588749940/work\n", - "tinycss2 @ file:///home/conda/feedstock_root/build_artifacts/tinycss2_1666100256010/work\n", - "tobler @ file:///home/conda/feedstock_root/build_artifacts/tobler_1696384105449/work\n", - "toml @ file:///home/conda/feedstock_root/build_artifacts/toml_1604308577558/work\n", - "tomli @ file:///home/conda/feedstock_root/build_artifacts/tomli_1644342247877/work\n", - "tomlkit @ file:///home/conda/feedstock_root/build_artifacts/tomlkit_1698950496895/work\n", - "toolz @ file:///home/conda/feedstock_root/build_artifacts/toolz_1657485559105/work\n", - "tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1648827257044/work\n", - "tqdm @ file:///home/conda/feedstock_root/build_artifacts/tqdm_1691671248568/work\n", - "traitlets @ file:///home/conda/feedstock_root/build_artifacts/traitlets_1675110562325/work\n", - "typed-ast @ file:///home/conda/feedstock_root/build_artifacts/typed-ast_1695409893559/work\n", - "types-python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/types-python-dateutil_1689882883784/work\n", - "typing-inspect==0.9.0\n", - "typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/typing_extensions_1695040754690/work\n", - "ujson @ file:///home/conda/feedstock_root/build_artifacts/ujson_1695472604200/work\n", - "uritemplate==3.0.1\n", - "urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1603125704209/work\n", - "virtualenv @ file:///home/conda/feedstock_root/build_artifacts/virtualenv_1643238754089/work\n", - "visions @ file:///home/conda/feedstock_root/build_artifacts/visions_1600915384170/work\n", - "watchdog @ file:///home/conda/feedstock_root/build_artifacts/watchdog_1695395257294/work\n", - "wcwidth @ file:///home/conda/feedstock_root/build_artifacts/wcwidth_1698744702785/work\n", - "webencodings @ file:///home/conda/feedstock_root/build_artifacts/webencodings_1694681268211/work\n", - "websocket-client @ file:///home/conda/feedstock_root/build_artifacts/websocket-client_1696770128353/work\n", - "widgetsnbextension @ file:///home/conda/feedstock_root/build_artifacts/widgetsnbextension_1637174139311/work\n", - "wurlitzer @ file:///home/conda/feedstock_root/build_artifacts/wurlitzer_1669944596833/work\n", - "xyzservices @ file:///home/conda/feedstock_root/build_artifacts/xyzservices_1698325309404/work\n", - "yapf @ file:///home/conda/feedstock_root/build_artifacts/yapf_1690387939953/work\n", - "yarl @ file:///home/conda/feedstock_root/build_artifacts/yarl_1696732512110/work\n", - "zict @ file:///home/conda/feedstock_root/build_artifacts/zict_1681770155528/work\n", - "zipp @ file:///home/conda/feedstock_root/build_artifacts/zipp_1695255097490/work\n", - "zstandard==0.22.0\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], - "source": [ - "# Check packages:\n", - "%pip freeze" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23ca0926-4afa-4a2a-9fa0-2eb77f6dcce6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.15" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}