diff --git a/.gitignore b/.gitignore index 4ea83cbf7e12..f402cb1e83e0 100644 --- a/.gitignore +++ b/.gitignore @@ -98,3 +98,5 @@ dist/ # For Hive metastore_db/ + +.ipynb_checkpoints diff --git a/tools/workload/benchmark_velox/README.md b/tools/workload/benchmark_velox/README.md index 5f080077f606..b1e7a2f795b8 100644 --- a/tools/workload/benchmark_velox/README.md +++ b/tools/workload/benchmark_velox/README.md @@ -36,3 +36,30 @@ We also provide a script [run_tpc_workload.sh](./run_tpc_workload.sh). This scri ## Analyzing Performance Results You can check the **Show Performance** section in the output notebook after execution. It shows the cpu% per query, and draws charts for the cpu%, memory throughput, disk throughput/util%, network throughput and pagefaults. + +## Set up Performance Analysis Tools + +Please check the **Set up perf analysis tools (optional)** section in [initialize.ipynb](./initialize.ipynb) to set up the environment required for running performance analysis scripts. Once the setup is complete, update the following variables in your YAML file (as documented in [params.yaml.template](./params.yaml.template)) before running TPC-H/TPC-DS Benchmarks: + +- server: Hostname or IP to server for perf analysis. Able to connect via ssh. Can be localhost if you deploy the perf analysis scripts on the local cluster. +- base_dir: Specify the directory on perf analysis server. Usually a codename for this run. +- analyze_perf: Whether to upload profile to perf analysis server and run perf analysis scripts. Only takes effect if server is set. In this case set to `True` +- proxy: Proxy used to connect to server for perf analysis. Only needed if the perf analysis server is accessed via proxy. + +After the workload completes, the tool generates a notebook, executes it automatically, and saves the output notebook in the `$HOME/PAUS/base_dir` directory with a suffix of `[APPLICATION ID].nbconvert.ipynb`. Additionally, the output notebook is converted into an HTML format for improved readability, with the same filename, and stored in the `html` sub-folder. + +A sample generated notebook for TPCH Q1 and its corresponding HTML file are available for reference: +- Notebook: [tpch_q1.nbconvert.ipynb](./sample/tpch_q1.nbconvert.ipynb) +- HTML file: [tpch_q1.html](./sample/tpch_q1.html) + +The notebook also produces a trace-viewer JSON file to analyze workload statistics. This includes SAR metrics and stage/task-level breakdowns. Using this tool, users can compare statistics across stages and queries, identify performance bottlenecks, and target specific stages for optimization. + +You can explore the sample trace-viewer JSON file using the Google Chrome browser. To do so: + +1. Download the sample file [trace_result_tpch_q1.json](./sample/trace_result_tpch_q1.json) +2. Launch Google Chrome. In the address bar, enter "chrome://tracing/". +3. Use the "Load" button to upload the sample JSON file. + +This will allow you to check the sample trace data interactively. + +![trace-result-tpch-q1](./sample/Trace-viewer.png) diff --git a/tools/workload/benchmark_velox/analysis/perf_analysis_template.ipynb b/tools/workload/benchmark_velox/analysis/perf_analysis_template.ipynb new file mode 100644 index 000000000000..8682b209d5f1 --- /dev/null +++ b/tools/workload/benchmark_velox/analysis/perf_analysis_template.ipynb @@ -0,0 +1,436 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "appid=''\n", + "disk=''\n", + "nic=''\n", + "tz=''\n", + "base_dir=''\n", + "name=''\n", + "notebook=''\n", + "notebook_html=''\n", + "proxy=''\n", + "emails=''\n", + "pr=''\n", + "\n", + "comp_appid=''\n", + "comp_base_dir=''\n", + "comp_name=''\n", + "\n", + "baseline_appid=''\n", + "baseline_base_dir=''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import findspark\n", + "findspark.init()\n", + "\n", + "import os\n", + "import time\n", + "import sys\n", + "from pyspark import SparkConf, SparkContext\n", + "from pyspark.sql import SQLContext\n", + "\n", + "def get_py4jzip():\n", + " spark_home=os.environ['SPARK_HOME']\n", + " py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n", + " return py4jzip[0]\n", + "\n", + "conf = (SparkConf()\n", + " .set('spark.app.name', f'perf_analysis_{appid}')\n", + " .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\n", + " .set('spark.executor.instances', '4')\n", + " .set('spark.executor.cores','4')\n", + " .set('spark.executor.memory', '8g')\n", + " .set('spark.driver.memory','20g')\n", + " .set('spark.memory.offHeap.enabled','True')\n", + " .set('spark.memory.offHeap.size','20g')\n", + " .set('spark.executor.memoryOverhead','1g')\n", + " .set('spark.executor.extraJavaOptions',\n", + " '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')\n", + " .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}\")\n", + " .set('spark.sql.inMemoryColumnarStorage.compressed','False')\n", + " .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')\n", + " .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')\n", + " .set('spark.sql.execution.arrow.pyspark.enabled','True')\n", + " .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')\n", + " .set(\"spark.sql.repl.eagerEval.enabled\", True)\n", + " .set(\"spark.sql.legacy.timeParserPolicy\",\"LEGACY\") \n", + " .set(\"spark.sql.session.timeZone\", tz)\n", + " )\n", + "\n", + "sc = SparkContext(conf=conf,master='yarn')\n", + "sc.setLogLevel(\"ERROR\")\n", + "spark = SQLContext(sc)\n", + "time.sleep(10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "%run ~/PAUS/sparklog.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"https_proxy\"] = proxy\n", + "os.environ[\"http_proxy\"] = proxy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "emonmetric=['emon_cpuutil',\n", + " 'emon_cpufreq',\n", + " 'emon_instr_retired',\n", + " 'emon_ipc']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "disk_prefix=[f\"'{dev}'\" for dev in disk.split(',')]\n", + "nic_prefix=[f\"'{dev}'\" for dev in nic.split(',')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "display(HTML(' 1 App info'))\n", + "display(HTML(f\" 2 Compare to {comp_name}\"))\n", + "display(HTML(' 3 Config compare'))\n", + "display(HTML(' 4 Compare to baseline'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# App info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "app=Application_Run(appid, basedir=base_dir)\n", + "appals=app.analysis['app']['als']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "stats=appals.get_basic_state()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", + "display(summary.style)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "traceview=app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "appals.get_app_name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if 'gluten' in name:\n", + " shuffle_df, dfx=appals.get_shuffle_stat()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "appals.show_critical_path_time_breakdown().T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if emails:\n", + " mail_list=' '.join(emails.split(','))\n", + " body,title=generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr)\n", + " !mail -a \"Content-type: text/html; charset=utf-8\" -s \"$title\" $mail_list < $body" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compare to" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if comp_appid:\n", + " comp_app=Application_Run(comp_appid,basedir=comp_base_dir)\n", + " output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", + " display(HTML(output))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Config compare" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if comp_appid:\n", + " comp_appals=comp_app.analysis['app']['als']\n", + " display(comp_spark_conf(appals, comp_appals))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Compare to baseline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "if baseline_appid:\n", + " baseline_app=Application_Run(baseline_appid,basedir=baseline_base_dir)\n", + " output=app.compare_app(rapp=baseline_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", + " display(HTML(output))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Convert to HTML" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%javascript\n", + "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# htmlname=nb_name.replace(\"ipynb\",\"html\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "nbTranslate": { + "displayLangs": [ + "*" + ], + "hotkey": "alt-t", + "langInMainMenu": true, + "sourceLang": "en", + "targetLang": "fr", + "useGoogleTranslate": true + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "197px", + "left": "2188px", + "top": "111px", + "width": "269px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tools/workload/benchmark_velox/analysis/requirements.txt b/tools/workload/benchmark_velox/analysis/requirements.txt new file mode 100644 index 000000000000..f230f8aa99de --- /dev/null +++ b/tools/workload/benchmark_velox/analysis/requirements.txt @@ -0,0 +1,174 @@ +ansicolors==1.1.8 +anyio==4.4.0 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +arrow==1.3.0 +asttokens==2.4.1 +attrs==23.2.0 +Automat==20.2.0 +Babel==2.8.0 +bcrypt==3.2.0 +beautifulsoup4==4.12.3 +black==24.4.2 +bleach==6.1.0 +blinker==1.4 +certifi==2020.6.20 +cffi==1.16.0 +chardet==4.0.0 +charset-normalizer==3.4.0 +click==8.0.3 +colorama==0.4.4 +comm==0.2.2 +configobj==5.0.6 +constantly==15.1.0 +contourpy==1.2.1 +cryptography==3.4.8 +cycler==0.12.1 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distro==1.7.0 +entrypoints==0.4 +exceptiongroup==1.2.1 +executing==2.0.1 +fastjsonschema==2.19.1 +findspark==2.0.1 +fire==0.7.0 +fonttools==4.52.4 +fqdn==1.5.1 +gitdb==4.0.11 +GitPython==3.1.43 +greenlet==3.0.3 +httplib2==0.20.2 +hyperlink==21.0.0 +idna==3.10 +importlib-metadata==4.6.4 +incremental==21.3.0 +ipykernel==6.29.4 +ipython==8.24.0 +ipython-genutils==0.2.0 +ipywidgets==8.1.3 +isoduration==20.11.0 +jedi==0.19.1 +jeepney==0.7.1 +Jinja2==3.0.3 +jsonpatch==1.32 +jsonpointer==2.0 +jsonschema==4.22.0 +jsonschema-specifications==2023.12.1 +jupyter_client==7.4.9 +jupyter_contrib_core==0.4.2 +jupyter_contrib_nbextensions==0.7.0 +jupyter_core==5.7.2 +jupyter-events==0.10.0 +jupyter-highlight-selected-word==0.2.0 +jupyter-nbextensions-configurator==0.6.3 +jupyter_server==2.14.0 +jupyter-server-mathjax==0.2.6 +jupyter_server_terminals==0.5.3 +jupyterlab_pygments==0.3.0 +jupyterlab_widgets==3.0.11 +keyring==23.5.0 +kiwisolver==1.4.5 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==5.2.2 +MarkupSafe==2.0.1 +matplotlib==3.5.2 +matplotlib-inline==0.1.7 +metakernel==0.30.2 +mistune==3.0.2 +more-itertools==8.10.0 +mypy-extensions==1.0.0 +nbclassic==1.1.0 +nbclient==0.10.0 +nbconvert==7.16.4 +nbdime==4.0.1 +nbformat==5.10.4 +nest-asyncio==1.6.0 +netifaces==0.11.0 +notebook==6.5.6 +notebook_shim==0.2.4 +NotebookScripter==6.0.0 +numpy==1.26.4 +oauthlib==3.2.0 +overrides==7.7.0 +packaging==24.0 +pandas==1.5.3 +pandasql==0.7.3 +pandocfilters==1.5.1 +papermill==2.6.0 +parso==0.8.4 +pathspec==0.12.1 +pexpect==4.8.0 +pillow==10.3.0 +pip==24.2 +platformdirs==4.2.2 +prometheus_client==0.20.0 +prompt_toolkit==3.0.45 +psutil==5.9.8 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==16.1.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.1 +pycparser==2.22 +Pygments==2.11.2 +PyHamcrest==2.0.2 +PyHDFS==0.3.1 +PyJWT==2.3.0 +pyOpenSSL==21.0.0 +pyparsing==2.4.7 +pyrsistent==0.18.1 +pyserial==3.5 +pyspark==3.3.1 +python-dateutil==2.9.0.post0 +python-json-logger==2.0.7 +pytz==2022.1 +PyYAML==6.0.2 +pyzmq==24.0.1 +referencing==0.35.1 +requests==2.32.3 +rfc3339-validator==0.1.4 +rfc3986-validator==0.1.1 +rfc3987==1.3.8 +rpds-py==0.18.1 +seaborn==0.13.2 +SecretStorage==3.3.1 +Send2Trash==1.8.3 +service-identity==18.1.0 +setuptools==75.1.0 +simplejson==3.19.2 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +soupsieve==2.5 +spylon==0.3.0 +spylon-kernel==0.4.1 +SQLAlchemy==1.4.46 +ssh-import-id==5.11 +stack-data==0.6.3 +tenacity==8.3.0 +termcolor==2.5.0 +terminado==0.18.1 +tinycss2==1.3.0 +tomli==2.0.1 +tornado==6.4 +tqdm==4.66.4 +traitlets==5.14.3 +Twisted==22.1.0 +types-python-dateutil==2.9.0.20240316 +typing_extensions==4.12.0 +tzdata==2024.1 +uri-template==1.3.0 +urllib3==1.26.5 +wadllib==1.3.6 +wcwidth==0.2.13 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.8.0 +wheel==0.44.0 +widgetsnbextension==4.0.11 +zipp==1.0.0 +zope.interface==5.4.0 diff --git a/tools/workload/benchmark_velox/analysis/run_perf_analysis.sh b/tools/workload/benchmark_velox/analysis/run_perf_analysis.sh new file mode 100755 index 000000000000..af30250d4812 --- /dev/null +++ b/tools/workload/benchmark_velox/analysis/run_perf_analysis.sh @@ -0,0 +1,180 @@ +#! /bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -x + +SCRIPT_LOCATION=$(dirname $0) +PAUS=$HOME/PAUS + +while [[ $# -gt 0 ]]; do + case $1 in + --base-dir) + BASEDIR="$2" + shift # past argument + shift # past value + ;; + --name) + NAME="$2" + shift # past argument + shift # past value + ;; + --appid) + APPID="$2" + shift # past argument + shift # past value + ;; + --pr) + PR="$2" + shift # past argument + shift # past value + ;; + --disk) + DISK="$2" + shift # past argument + shift # past value + ;; + --nic) + NIC="$2" + shift # past argument + shift # past value + ;; + --tz) + SPARK_TZ="$2" + shift # past argument + shift # past value + ;; + --proxy) + PROXY="$2" + shift # past argument + shift # past value + ;; + --emails) + EMAILS="$2" + shift # past argument + shift # past value + ;; + --comp-appid) + COMP_APPID="$2" + shift # past argument + shift # past value + ;; + --comp-base-dir) + COMP_BASEDIR="$2" + shift # past argument + shift # past value + ;; + --comp-name) + COMP_NAME="$2" + shift # past argument + shift # past value + ;; + --baseline-appid) + BASELINE_APPID="$2" + shift # past argument + shift # past value + ;; + --baseline-base-dir) + BASELINE_BASEDIR="$2" + shift # past argument + shift # past value + ;; + *) + echo "Error: Unknown argument: $1" + exit 1 + ;; + esac +done + +# Validation: Check if any of the required variables are empty +if [[ -z "${BASEDIR+x}" || -z "${NAME+x}" || -z "${APPID+x}" || -z "${DISK+x}" || -z "${NIC+x}" || -z "${SPARK_TZ+x}" ]]; then + echo "Error: One or more required arguments are missing or empty." + exit 1 +fi + +mkdir -p $PAUS +if [ ! -f "$PAUS/perf_analysis_template.ipynb" ]; then + cp $SCRIPT_LOCATION/perf_analysis_template.ipynb $PAUS/ +fi +if [ ! -f "$PAUS/sparklog.ipynb" ]; then + cp $SCRIPT_LOCATION/sparklog.ipynb $PAUS/ +fi + +workdir=$PAUS/$BASEDIR +mkdir -p $workdir +mkdir -p $workdir/html + +nb_name0=${NAME}_${APPID} +nb_name=${nb_name0}.ipynb + +# Upload eventlog +cp -f $PAUS/perf_analysis_template.ipynb $workdir/$nb_name +hdfs dfs -mkdir -p /history +hdfs dfs -ls /history/$APPID >/dev/null 2>&1 || { hdfs dfs -cp /$BASEDIR/$APPID/app.log /history/$APPID || exit 1; } + +EXTRA_ARGS="" +if [ -v COMP_APPID ] +then + if [[ -z "${COMP_BASEDIR+x}" || -z "${COMP_NAME+x}" ]]; then + echo "Missing --comp-base-dir or --comp-name" + exit 1 + fi + hdfs dfs -ls /history/$COMP_APPID >/dev/null 2>&1 || { hdfs dfs -cp /$COMP_BASEDIR/$COMP_APPID/app.log /history/$COMP_APPID || exit 1; } + EXTRA_ARGS=$EXTRA_ARGS" -r comp_appid $COMP_APPID -r comp_base_dir $COMP_BASEDIR -r comp_name $COMP_NAME" + sed -i "s/# Compare to\"/# Compare to $COMP_NAME\"/g" $workdir/$nb_name +fi +if [ -v BASELINE_APPID ] +then + if [[ -z "${BASELINE_BASEDIR+x}" ]]; then + echo "Missing --baseline-base-dir" + exit 1 + fi + hdfs dfs -ls /history/$BASELINE_APPID >/dev/null 2>&1 || { hdfs dfs -cp /$BASELINE_BASEDIR/$BASELINE_APPID/app.log /history/$BASELINE_APPID || exit 1; } + EXTRA_ARGS=$EXTRA_ARGS" -r baseline_appid $BASELINE_APPID -r baseline_base_dir $BASELINE_BASEDIR" +fi + + +if [ -n "${PR}" ] +then + EXTRA_ARGS=$EXTRA_ARGS" -r pr $PR" +fi + +if [ -n "${PROXY}" ] +then + EXTRA_ARGS=$EXTRA_ARGS" -r proxy $PROXY" +fi + +if [ -n "${EMAILS}" ] +then + EXTRA_ARGS=$EXTRA_ARGS" -r emails $EMAILS" +fi + +source ~/paus-env/bin/activate + +notebook_html=html/${nb_name0}.html + +papermill --cwd $workdir \ + -r appid $APPID \ + -r disk $DISK \ + -r nic $NIC \ + -r tz $SPARK_TZ \ + -r base_dir $BASEDIR \ + -r name $NAME \ + -r notebook $nb_name \ + -r notebook_html $notebook_html \ + $EXTRA_ARGS $workdir/$nb_name $workdir/$nb_name + +jupyter nbconvert --to html --no-input $workdir/$nb_name --output $workdir/$notebook_html --template classic > /dev/null 2>&1 diff --git a/tools/workload/benchmark_velox/analysis/sparklog.ipynb b/tools/workload/benchmark_velox/analysis/sparklog.ipynb new file mode 100644 index 000000000000..79713c8d1be3 --- /dev/null +++ b/tools/workload/benchmark_velox/analysis/sparklog.ipynb @@ -0,0 +1,6137 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# initialize" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "from __future__ import nested_scopes\n", + "from IPython.display import display, HTML\n", + "display(HTML(''))\n", + "display(HTML(''))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true, + "lang": "en" + }, + "outputs": [], + "source": [ + "import logging\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.ERROR)\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import os\n", + "import datetime\n", + "from datetime import date\n", + "import time\n", + "import threading\n", + "import gzip\n", + "import json\n", + "import math\n", + "import re\n", + "import html\n", + "import builtins\n", + "\n", + "import collections\n", + "import numpy\n", + "import pandas\n", + "pandas.options.display.max_rows=50\n", + "pandas.options.display.max_columns=200\n", + "pandas.options.display.float_format = '{:,}'.format\n", + "\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.ticker as mtick\n", + "import matplotlib.lines as mlines\n", + "from matplotlib import colors\n", + "from matplotlib import rcParams\n", + "rcParams['font.sans-serif'] = 'Courier New'\n", + "rcParams['font.family'] = 'Courier New'\n", + "rcParams['font.size'] = '12'\n", + "%matplotlib inline\n", + "\n", + "from ipywidgets import IntProgress,Layout\n", + "\n", + "import pyspark\n", + "import pyspark.sql\n", + "import pyspark.sql.functions as F\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.functions import to_date, floor, lit, rank, col, lag, when, pandas_udf, PandasUDFType, avg, sum as _sum\n", + "from pyspark.sql.window import Window\n", + "from pyspark.sql.types import *\n", + "from pyspark.ml import Pipeline\n", + "from pyspark.ml.feature import StringIndexer, VectorAssembler\n", + "from pyspark.ml.clustering import KMeans\n", + "from pyspark.storagelevel import StorageLevel\n", + "\n", + "import seaborn as sns\n", + "from functools import reduce\n", + "from pandasql import sqldf\n", + "from itertools import chain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "import pyhdfs\n", + "import socket\n", + "localhost=socket.gethostname()\n", + "local_ip=socket.gethostbyname(localhost)\n", + "\n", + "fs = pyhdfs.HdfsClient(hosts=f'{local_ip}:9870', user_name='sparkuser')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# fs functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def getexecutor_stat(pdir):\n", + " appfolder=fs.list_status(pdir)\n", + " total_rchar=0\n", + " total_wchar=0\n", + " total_read_bytes=0\n", + " total_write_bytes=0\n", + " total_cancelled_write_bytes=0\n", + "\n", + " for t in appfolder:\n", + " if t['type']=='DIRECTORY' and t['pathSuffix']!=\"summary.parquet\":\n", + " cdir=pdir+t['pathSuffix']\n", + " for cntfile in fs.listdir(cdir):\n", + " if cntfile.endswith(\".stat\"):\n", + " with fs.open(cdir+\"/\"+cntfile) as f:\n", + " cnt=f.readlines()\n", + " rchar=0\n", + " wchar=0\n", + " read_bytes=0\n", + " write_bytes=0\n", + " cancelled_write_bytes=0\n", + " for c in cnt:\n", + " c=c.decode('ascii')\n", + " if c.startswith(\"rchar\"):\n", + " v=int(c.split(\" \")[-1])\n", + " rchar=v-rchar\n", + " elif c.startswith(\"wchar\"):\n", + " v=int(c.split(\" \")[-1])\n", + " wchar=v-wchar\n", + " elif c.startswith(\"read_bytes\"):\n", + " v=int(c.split(\" \")[-1])\n", + " read_bytes=v-read_bytes\n", + " elif c.startswith(\"write_bytes\"):\n", + " v=int(c.split(\" \")[-1])\n", + " write_bytes=v-write_bytes\n", + " elif c.startswith(\"cancelled_write_bytes\"):\n", + " v=int(c.split(\" \")[-1])\n", + " cancelled_write_bytes=v-cancelled_write_bytes\n", + " total_rchar+=rchar/1024/1024\n", + " total_wchar+=wchar/1024/1024\n", + " total_read_bytes+=read_bytes/1024/1024\n", + " total_write_bytes+=write_bytes/1024/1024\n", + " total_cancelled_write_bytes+=cancelled_write_bytes/1024/1024\n", + " return (total_rchar,total_wchar,total_read_bytes,total_write_bytes,total_cancelled_write_bytes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def background_gradient(s, m, M, cmap='PuBu', low=0, high=0):\n", + " from matplotlib import colors\n", + " rng = M - m\n", + " norm = colors.Normalize(m - (rng * low),\n", + " M + (rng * high))\n", + " normed = norm(s.values)\n", + " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n", + " return ['background-color: {:s}'.format(color) for color in c]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": "true", + "heading_collapsed": true + }, + "source": [ + "# base class" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class SparkLog_Analysis:\n", + " def __init__(self, appid,jobids,clients):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Analysis:\n", + " def __init__(self,file):\n", + " self.file=file\n", + " self.starttime=0\n", + " self.df=None\n", + " \n", + " def load_data(self):\n", + " pass\n", + " \n", + " def generate_trace_view_list(self,id=0, **kwargs):\n", + " if self.df==None:\n", + " self.load_data()\n", + " trace_events=[]\n", + " node=kwargs.get('node',\"node\")\n", + " trace_events.append(json.dumps({\"name\": \"process_name\",\"ph\": \"M\",\"pid\":id,\"tid\":0,\"args\":{\"name\":\" \"+node}}))\n", + " return trace_events\n", + " \n", + " def generate_trace_view(self, trace_output, **kwargs):\n", + " traces=[]\n", + " traces.extend(self.generate_trace_view_list(0,**kwargs))\n", + " \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ],\n", + " \"displayTimeUnit\": \"ns\"\n", + " }'''\n", + "\n", + " if(\"home\" in trace_output):\n", + " outputfolder=trace_output\n", + " appidx=trace_output.split(\"/\")[-1]\n", + " else:\n", + " outputfolder='/home/sparkuser/trace_result/'+trace_output+'.json'\n", + " appidx=trace_output\n", + " with open(outputfolder, 'w') as outfile: \n", + " outfile.write(output)\n", + " \n", + " traceview_link=f'http://{localhost}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{appidx}.json'\n", + " display(HTML(f\"{traceview_link}\"))\n", + " return traceview_link" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# EMON process" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def get_alias_name(metric,func):\n", + " return metric+\"_\"+func.__name__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def splits_fill0(x):\n", + " fi=[]\n", + " for l in x:\n", + " li=re.split(r'\\s+',l.strip())\n", + " li=[l.replace(\",\",\"\") for l in li]\n", + " for j in range(len(li),192*4+5):\n", + " li.append('0')\n", + " fi.append(li)\n", + " return iter(fi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def background_gradient(s, m, M, cmap='PuBu', low=0, high=0):\n", + " from matplotlib import colors\n", + " rng = M - m\n", + " norm = colors.Normalize(m - (rng * low),\n", + " M + (rng * high))\n", + " normed = norm(s.values)\n", + " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n", + " return ['background-color: {:s}'.format(color) for color in c]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "class Emon_Analysis(Analysis):\n", + " def __init__(self,emon_file):\n", + " Analysis.__init__(self,emon_file)\n", + " \n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/emonv.txt\"):\n", + " self.totalcores=0\n", + " self.numberofpackages=0\n", + " self.coresperpackage=0\n", + " self.threadsperpackage=0\n", + " self.tsc=0\n", + " self.unc_cha_cnt=0\n", + " self.unc_mdf_cnt=0\n", + " self.unc_imc_cnt=0\n", + " self.unc_cxlcm_cnt=0\n", + " self.unc_cxldp_cnt=0\n", + " self.unc_mchbm_cnt=0\n", + " self.unc_m2hbm_cnt=0\n", + " self.unc_pmem_fc_cnt=0\n", + " self.unc_pmem_mc_cnt=0\n", + " self.unc_m2m_cnt=0\n", + " self.unc_qpi_cnt=0\n", + " self.unc_r3qpi_cnt=0\n", + " self.unc_iio_cnt=0\n", + " self.unc_irp_cnt=0\n", + " self.unc_pcu_cnt=0\n", + " self.unc_ubox_cnt=0\n", + " self.unc_m2pcie_cnt=0\n", + " self.unc_rdt_cnt=0\n", + " with fs.open(paths[0]+\"/emonv.txt\") as f:\n", + " allcnt = f.read().decode('ascii')\n", + " for l in allcnt.split(\"\\n\"):\n", + " if l.startswith(\"number_of_online_processors\"):\n", + " self.totalcores=int(re.split(\" +\",l)[2])\n", + " elif re.search(\"Number of Packages: +(\\d+)\",l):\n", + " self.numberofpackages=int(re.search(\"Number of Packages: +(\\d+)\",l).group(1))\n", + " elif re.search(\"Cores Per Package: +(\\d+)\",l):\n", + " self.coresperpackage=int(re.search(\"Cores Per Package: +(\\d+)\",l).group(1))\n", + " elif re.search(\"Threads Per Package: +(\\d+)\",l):\n", + " self.threadsperpackage=int(re.search(\"Threads Per Package: +(\\d+)\",l).group(1))\n", + " elif re.search(\"TSC Freq +[.]+ +([0-9.]+)\",l):\n", + " self.tsc=int(float(re.search(\"TSC Freq +[.]+ +([0-9.]+)\",l).group(1))*1000000)\n", + " elif l.startswith(\" cha\"):\n", + " self.unc_cha_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" mdf\"):\n", + " self.unc_mdf_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" imc\"):\n", + " self.unc_imc_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" cxlcm\"):\n", + " self.unc_cxlcm_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" cxldp\"):\n", + " self.unc_cxldp_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" mchbm\"):\n", + " self.unc_mchbm_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" m2hbm\"):\n", + " self.unc_m2hbm_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" pmem_fc\"):\n", + " self.unc_pmem_fc_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" pmem_mc\"):\n", + " self.unc_pmem_mc_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" m2m\"):\n", + " self.unc_m2m_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" qpi\"):\n", + " self.unc_qpi_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" r3qpi\"):\n", + " self.unc_r3qpi_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" iio\"):\n", + " self.unc_iio_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" irp\"):\n", + " self.unc_irp_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" pcu\"):\n", + " self.unc_pcu_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" ubox\"):\n", + " self.unc_ubox_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" m2pcie\"):\n", + " self.unc_m2pcie_cnt=int(re.split(\" +\",l)[-1])*2\n", + " elif l.startswith(\" rdt\"):\n", + " self.unc_rdt_cnt=int(re.split(\" +\",l)[-1])*2\n", + " else:\n", + " raise Exception(\"Wrong, no emonv specified\")\n", + " \n", + " self.begin_clk=0\n", + " self.end_clk=0\n", + " \n", + " self.corecnt=self.totalcores\n", + " \n", + " self.emon_metrics=collections.OrderedDict({\n", + " 'emon_cpuutil':{\n", + " 'sum_func':self.cores_sum, \n", + " 'events':{\n", + " 'a':'CPU_CLK_UNHALTED.REF_TSC'\n", + " },\n", + " 'formula':{\n", + " 'cpu%':'a/({:f}*{:d})'.format(self.tsc,self.corecnt)\n", + " },\n", + " 'fmt':lambda l: F.round(l, 3)\n", + " },\n", + " 'emon_cpufreq':{\n", + " 'sum_func':self.cores_sum, \n", + " 'events':{\n", + " 'a':'CPU_CLK_UNHALTED.THREAD',\n", + " 'b':'CPU_CLK_UNHALTED.REF_TSC'\n", + " },\n", + " 'formula':{\n", + " 'cpu freq':'a/b*{:f}'.format(self.tsc/1000000)\n", + " },\n", + " 'fmt':lambda l: F.round(l, 3)\n", + " },\n", + " 'emon_instr_retired':{\n", + " 'sum_func':self.cores_sum, \n", + " 'events':{\n", + " 'a':'INST_RETIRED.ANY'\n", + " },\n", + " 'formula':{\n", + " 'pathlength':'a/1000000000'\n", + " },\n", + " 'fmt':lambda l: F.round(l, 0)\n", + " },\n", + " 'emon_ipc':{\n", + " 'sum_func':self.cores_sum, \n", + " 'events':{\n", + " 'a':'CPU_CLK_UNHALTED.THREAD',\n", + " 'b':'INST_RETIRED.ANY'\n", + " },\n", + " 'formula':{\n", + " 'ipc':'b/a'\n", + " },\n", + " 'fmt':lambda l: F.round(l, 3)\n", + " }\n", + " })\n", + " self.effective_metric=None\n", + " self.appclients=[] # there is no appid and client column\n", + "\n", + " def count_sum(self,collected_cores):\n", + " return F.expr('+'.join(['_{:d}/_2*{:d}'.format(c+3,self.tsc) for c in collected_cores]))\n", + "\n", + " def cores_sum(self,collected_cores):\n", + " return self.count_sum(collected_cores)\n", + "\n", + " def mem_sum(self,collected_cores):\n", + " return self.count_sum(collected_cores)\n", + "\n", + " def pcie_sum(self,collected_cores):\n", + " return self.count_sum([2,3,7,8])\n", + " \n", + " def list_metric(self):\n", + " if self.effective_metric is None:\n", + " self.get_effective_metric()\n", + " for k in self.effective_metric:\n", + " m=self.emon_metrics[k]\n", + " print(k)\n", + " for fk,fm in m['formula'].items():\n", + " print(\" \",fk)\n", + " \n", + " def load_data(self):\n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/emon.parquet/_SUCCESS\"):\n", + " self.df=spark.read.parquet(paths[0]+\"/emon.parquet\")\n", + " self.df.cache()\n", + " return\n", + " \n", + " emondata=sc.textFile(self.file)\n", + " emondf=emondata.mapPartitions(splits_fill0).toDF()\n", + " emondf=emondf.withColumn(\"id\", F.monotonically_increasing_id())\n", + " giddf=emondf.where(emondf._1.rlike(\"======\")).selectExpr(\"id as g_id\")\n", + " \n", + " iddf=emondf.where(emondf._1.rlike(\"\\d\\d/\")).selectExpr(\"_1 as r_1\",\"_2 as r_2\",\"id as r_id\")\n", + " jfid=emondf.where(emondf._2.rlike(\"^[1-9][0-9][0-9]+\")).join(iddf,on=[emondf.id>iddf.r_id]).groupBy('id').agg(F.max('r_id').alias('r_id'))\n", + " iddf=iddf.join(jfid,on='r_id',how='left')\n", + " emondf=emondf.where(emondf._2.rlike(\"^[1-9][0-9][0-9]+\")).join(iddf,on='id',how='left')\n", + " \n", + " jfid=emondf.join(giddf,on=[emondf.id>giddf.g_id]).groupBy('id').agg(F.max('g_id').alias('g_id'))\n", + " giddf=giddf.join(jfid,on='g_id',how='left')\n", + " emondf=emondf.join(giddf,on='id',how='inner')\n", + " \n", + " df=emondf\n", + "\n", + " select_list = []\n", + " for idx, c in enumerate(df.columns):\n", + " if idx >= 2 and c.startswith('_'):\n", + " select_list.append(col(c).cast(LongType()).alias(c))\n", + " else:\n", + " select_list.append(col(c))\n", + " df=df.select(select_list)\n", + "\n", + " df=df.withColumn(\"timestamp\",F.unix_timestamp(F.concat_ws(' ','r_1','r_2'),'MM/dd/yyyy HH:mm:ss')*F.lit(1000)+(F.split(F.col('r_2'),'\\.')[1]).astype(IntegerType()))\n", + " df=df.drop(\"r_1\")\n", + " df=df.drop(\"r_2\")\n", + " \n", + " cores=list(range(0,self.totalcores))\n", + " df=df.withColumn('sum',\n", + " F.when(F.col(\"_1\").startswith(\"UNC_IIO\"),self.pcie_sum(cores))\n", + " .otherwise(self.cores_sum(cores)))\n", + " if self.begin_clk>0 and self.end_clk>0:\n", + " df=df.withColumn('valid',((F.col(\"timestamp\")>F.lit(self.begin_clk)) & (F.col(\"timestamp\")0:\n", + " effective_metric.append(k)\n", + " progress.value=progress.value+1\n", + " self.effective_metric=effective_metric\n", + " emondf.unpersist()\n", + " \n", + " def gen_metric(self,emondf, m):\n", + " join_df=None\n", + " for alias,event in m['events'].items():\n", + " if join_df is None:\n", + " join_df=emondf.where(\"_1='{:s}'\".format(event)).select('timestamp','_1','_2','r_id','g_id',*self.appclients,F.col('sum').alias(alias))\n", + " else:\n", + " tdf=emondf.where(\"_1='{:s}'\".format(event)).select('_1','_2','r_id','g_id',*self.appclients,F.col('sum').alias(alias))\n", + " join_dft=join_df.join(tdf.drop('g_id'),on=['r_id',*self.appclients],how='inner')\n", + " if join_dft.count()==0:\n", + " join_df=join_df.join(tdf.drop('r_id'),on=['g_id',*self.appclients],how='inner')\n", + " else:\n", + " join_df=join_dft\n", + " return join_df\n", + "\n", + " \n", + " \n", + " def generate_trace_view_list(self,id=0, **kwargs):\n", + " trace_events=Analysis.generate_trace_view_list(self,id, **kwargs)\n", + " \n", + " cores=list(range(0,self.totalcores))\n", + " \n", + " emondf=self.df\n", + " if 'collected_cores' in kwargs:\n", + " cores=kwargs.get(\"collected_cores\",None)\n", + " emondf=emondf.withColumn('sum',\n", + " F.when(F.col(\"_1\").startswith(\"UNC_IIO\"),self.pcie_sum(cores))\n", + " .otherwise(self.cores_sum(cores)))\n", + " show_metric= kwargs.get('show_metric', None)\n", + " \n", + " if show_metric is None and self.effective_metric is None:\n", + " self.get_effective_metric()\n", + "\n", + " self.effective_metric=show_metric if show_metric is not None else self.effective_metric\n", + " \n", + " emondf=self.df\n", + " \n", + " tid=0\n", + " for k in self.effective_metric:\n", + " m=self.emon_metrics[k]\n", + " join_df=self.gen_metric(emondf,m)\n", + " rstdf=join_df.select(\n", + " F.lit(tid).alias('tid'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.lit(k).alias('name'),\n", + " (F.col('timestamp')-F.lit(self.starttime)).alias(\"ts\"),\n", + " F.struct(*[m['fmt'](F.expr(formula)).alias(col_name) for col_name,formula in m['formula'].items() ]).alias('args')\n", + " ).where(F.col(\"ts\").isNotNull()).orderBy('ts')\n", + " trace_events.extend(rstdf.toJSON().collect())\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":tid,\"args\":{\"sort_index \":tid}}))\n", + " tid=tid+1 \n", + "\n", + " return trace_events\n", + " \n", + " def show_emon_metric(self,metric,sub_metric,core,draw=True,metric_define=None, **kwargs):\n", + " if self.df==None:\n", + " self.load_data()\n", + " emondf=self.df\n", + " \n", + " showalltime=kwargs.get(\"showalltime\",True)\n", + " \n", + " if not showalltime:\n", + " emondf=emondf.filter(F.col(\"valid\")==F.lit(True))\n", + " \n", + " if metric is None or metric=='':\n", + " for k in self.effective_metric:\n", + " m=self.emon_metrics[k]\n", + " if sub_metric in m['formula']:\n", + " break\n", + " else:\n", + " print(\"can't find metric\",sub_metric)\n", + " return \n", + " else:\n", + " k=metric\n", + " if metric_define is None:\n", + " m= self.emon_metrics[k]\n", + " else:\n", + " m= metric_define[k]\n", + "\n", + " if type(core)==int:\n", + " core=[core,]\n", + " emondf=emondf.withColumn('sum',\n", + " F.when(F.col(\"_1\").startswith(\"UNC_IIO\"),self.pcie_sum(core))\n", + " .otherwise(self.count_sum(core)))\n", + " \n", + " join_df=self.gen_metric(emondf,m)\n", + " \n", + " rstdf=join_df.select(\n", + " F.col('timestamp').alias('ts'),\n", + " m['fmt'](F.expr(m['formula'][sub_metric])).alias(sub_metric),\n", + " 'r_id'\n", + " ).where(F.col(\"timestamp\").isNotNull()).orderBy('timestamp')\n", + " \n", + " metric_sum=rstdf.select(sub_metric).summary().toPandas()\n", + " display(metric_sum)\n", + " \n", + " if draw:\n", + " pddf=rstdf.toPandas()\n", + " pddf['ts']=(pddf['ts']-pddf.loc[0,'ts'])/1000\n", + " fig, axs = plt.subplots(nrows=1, ncols=2, sharey=True,figsize=(30,8),gridspec_kw = {'width_ratios':[1, 5]})\n", + " plt.subplots_adjust(wspace=0.01)\n", + " sns.violinplot(y=sub_metric, data=pddf, ax=axs[0],palette=['g'])\n", + " axs[0].yaxis.grid(True, which='major')\n", + " ax=axs[1]\n", + " ax.stackplot(pddf['ts'], pddf[sub_metric],colors=['bisque'])\n", + " #ymin, ymax = ax.get_ylim()\n", + " ax2 = ax.twinx()\n", + " ax2.set_ylim(ax.get_ylim())\n", + " ax2.axhline(y=float(metric_sum.loc[4,sub_metric]), linewidth=2, color='r')\n", + " ax2.axhline(y=float(metric_sum.loc[5,sub_metric]), linewidth=2, color='r')\n", + " ax2.axhline(y=float(metric_sum.loc[6,sub_metric]), linewidth=2, color='r')\n", + " ax2.axhline(y=float(metric_sum.loc[7,sub_metric]), linewidth=2, color='r')\n", + " ax.set_xlabel('time (s)')\n", + " ax.yaxis.grid(True, which='major')\n", + " plt.show()\n", + " \n", + " hist_elapsedtime=rstdf.select('`{:s}`'.format(sub_metric)).rdd.flatMap(lambda x: x).histogram(15)\n", + " fig, axs = plt.subplots(figsize=(30, 5))\n", + " ax=axs\n", + " binSides, binCounts = hist_elapsedtime\n", + " binSides=[builtins.round(l,2) for l in binSides]\n", + "\n", + " N = len(binCounts)\n", + " ind = numpy.arange(N)\n", + " width = 0.5\n", + "\n", + " rects1 = ax.bar(ind+0.5, binCounts, width, color='b')\n", + "\n", + " ax.set_ylabel('Frequencies')\n", + " ax.set_title(sub_metric)\n", + " ax.set_xticks(numpy.arange(N+1))\n", + " ax.set_xticklabels(binSides)\n", + " return rstdf\n", + " \n", + "\n", + " def gen_reduce_metric(self,metric,core,sub_metric,agg_func):\n", + " if self.df==None:\n", + " self.load_data()\n", + " emondf=self.df\n", + " \n", + " emondf=emondf.where(F.col(\"valid\")==F.lit(True))\n", + " \n", + " k=metric\n", + " m= self.emon_metrics[k]\n", + "\n", + " if type(core)==int:\n", + " core=[core,]\n", + " \n", + " if len(core)(date.today() - timedelta(days=60)).strftime(\"%Y_%m_%d\"):\n", + " for r in fs.listdir(\"/gluten/\"+l):\n", + " if fs.exists(\"/gluten/\"+l+\"/\"+r+\"/app.log\"):\n", + " apps.append(\"/gluten/\"+l+\"/\"+r+\"/app.log\")\n", + " if currentdir not in apps:\n", + " apps.append(currentdir)\n", + " appdf=spark.read.json(apps)\n", + " appdf=appdf.withColumn(\"filename\", F.input_file_name())\n", + " starttime=appdf.where(\"Properties.`spark.app.name` like '\"+namelike+\"%' and Event='SparkListenerJobStart'\").select(\"filename\",F.col('Properties.`spark.app.name`').alias(\"appname\"),F.col('Submission Time').alias(\"starttime\"))\n", + " finishtime=appdf.where(\"Event='SparkListenerJobEnd'\").select(\"filename\",F.col('Completion Time').alias(\"finishtime\"))\n", + " starttime=starttime.groupBy(\"filename\").agg(F.max(\"appname\").alias(\"appname\"),F.min(\"starttime\").alias(\"starttime\"))\n", + " finishtime=finishtime.groupBy(\"filename\").agg(F.max(\"finishtime\").alias(\"finishtime\"))\n", + " elapsedtime=starttime.join(finishtime,\"filename\").orderBy(\"starttime\").select(F.date_format(F.from_unixtime(F.col('starttime')/1000),\"yyyy_MM_dd\").alias(\"test_date\"),(F.col(\"finishtime\")/1000-F.col(\"starttime\")/1000).alias(\"elapsedtime\"))\n", + " epsdf=elapsedtime.toPandas()\n", + " epsdf.plot(x='test_date',y=['elapsedtime'],style=\"-*\",figsize=(30,8))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "from pyspark.sql.functions import udf\n", + "@udf(\"long\")\n", + "def isfinish_udf(s):\n", + " import json\n", + " s=json.loads(s)\n", + " def isfinish(root):\n", + " if \"isFinalPlan=false\" in root['simpleString'] or root['children'] is None:\n", + " return 0\n", + " for c in root[\"children\"]:\n", + " if isfinish(c)==0:\n", + " return 0\n", + " return 1\n", + " if len(s)>0:\n", + " return isfinish(s[0])\n", + " else:\n", + " return 0\n", + " \n", + "@pandas_udf(\"taskid long, start long, dur long, name string\", PandasUDFType.GROUPED_MAP)\n", + "def time_breakdown(pdf):\n", + " ltime=pdf['Launch Time'][0]+2\n", + " pdf['start']=0\n", + " pdf['dur']=0\n", + " outpdf=[]\n", + " ratio=(pdf[\"Finish Time\"][0]-pdf[\"Launch Time\"][0])/pdf[\"Update\"].sum()\n", + " ratio=1 if ratio>1 else ratio\n", + " for idx,l in pdf.iterrows():\n", + " if(l[\"Update\"]*ratio>1):\n", + " outpdf.append([l[\"Task ID\"],ltime,int(l[\"Update\"]*ratio),l[\"mname\"]])\n", + " ltime=ltime+int(l[\"Update\"]*ratio)\n", + " if len(outpdf)>0:\n", + " return pandas.DataFrame(outpdf)\n", + " else:\n", + " return pandas.DataFrame({'taskid': pandas.Series([], dtype='long'),\n", + " 'start': pandas.Series([], dtype='long'),\n", + " 'dur': pandas.Series([], dtype='long'),\n", + " 'name': pandas.Series([], dtype='str'),\n", + " })\n", + " \n", + "class App_Log_Analysis(Analysis):\n", + " def __init__(self, file, jobids):\n", + " Analysis.__init__(self,file)\n", + " self.jobids=[] if jobids is None else [str(l) for l in jobids]\n", + " self.df=None\n", + " self.pids=[]\n", + " \n", + " def load_data(self):\n", + " print(\"load data \", self.file)\n", + " jobids=self.jobids\n", + " df=spark.read.json(self.file)\n", + " \n", + " if 'App ID' in df.columns:\n", + " self.appid=df.where(\"`App ID` is not null\").collect()[0][\"App ID\"]\n", + " else:\n", + " self.appid=\"Application-00000000\"\n", + " \n", + " if df.where(\"Event='org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates'\").count()>0:\n", + " self.dfacc=df.where(\"Event='org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates'\").select(F.col(\"executionId\").alias(\"queryid\"),F.explode(\"accumUpdates\"))\n", + " else:\n", + " self.dfacc = None\n", + " \n", + " if \"sparkPlanInfo\" in df.columns:\n", + " self.queryplans=df.where(\"(Event='org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart' or Event='org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate') \\\n", + " and (sparkPlanInfo.nodeName!='AdaptiveSparkPlan' or sparkPlanInfo.simpleString='AdaptiveSparkPlan isFinalPlan=true') \").select(F.col(\"executionId\").alias(\"queryid\"),'physicalPlanDescription',\"sparkPlanInfo.*\")\n", + " else:\n", + " self.queryplans=None\n", + " \n", + " seen = set()\n", + " \n", + " if self.queryplans is not None:\n", + " self.queryplans=self.queryplans.where(isfinish_udf(F.to_json(\"children\"))==1)\n", + " \n", + " self.allmetrics=[]\n", + " if self.queryplans.count() > 0:\n", + " metrics=self.queryplans.collect()\n", + " def get_metric(root):\n", + " for l in root[\"metrics\"]:\n", + " if l['accumulatorId'] not in seen:\n", + " seen.add(l['accumulatorId'])\n", + " self.allmetrics.append([l['accumulatorId'],l[\"metricType\"],l['name'],root[\"nodeName\"]])\n", + " if root['children'] is not None:\n", + " for c in root[\"children\"]:\n", + " get_metric(c)\n", + " for c in metrics:\n", + " get_metric(c)\n", + " \n", + " amsdf=spark.createDataFrame(self.allmetrics)\n", + " amsdf=amsdf.withColumnRenamed(\"_1\",\"ID\").withColumnRenamed(\"_2\",\"type\").withColumnRenamed(\"_3\",\"Name\").withColumnRenamed(\"_4\",\"nodeName\")\n", + " \n", + " \n", + " if self.dfacc is not None:\n", + " self.dfacc=self.dfacc.select(\"queryid\",(F.col(\"col\")[0]).alias(\"ID\"),(F.col(\"col\")[1]).alias(\"Update\")).join(amsdf,on=[\"ID\"])\n", + " \n", + " if self.queryplans is not None:\n", + " self.metricscollect=[l for l in self.allmetrics if l[1] in ['nsTiming','timing'] and (l[2].startswith(\"time to\") or l[2].startswith(\"time of\") or l[2].startswith(\"scan time\") or l[2].startswith(\"shuffle write time\") or l[2].startswith(\"time to spill\") or l[2].startswith(\"task commit time\")) \n", + " and l[2] not in(\"time to collect batch\", \"time of scan\") ]\n", + " \n", + " #config=df.where(\"event='SparkListenerJobStart' and Properties.`spark.executor.cores` is not null\").select(\"Properties.*\").limit(1).collect()\n", + " config=df.select(\"`Spark Properties`.*\").where(\"`spark.app.id` is not null\").limit(1).collect()\n", + " \n", + " configdic=config[0].asDict()\n", + " self.parallelism=int(configdic['spark.sql.shuffle.partitions']) if 'spark.sql.shuffle.partitions' in configdic else 1\n", + " self.executor_cores=int(configdic['spark.executor.cores']) if 'spark.executor.cores' in configdic else 1\n", + " self.executor_instances=int(configdic['spark.executor.instances']) if 'spark.executor.instances' in configdic else 1\n", + " self.taskcpus= int(configdic['spark.task.cpus'])if 'spark.task.cpus' in configdic else 1\n", + " self.batchsize= int(configdic['spark.gluten.sql.columnar.maxBatchSize'])if 'spark.gluten.sql.columnar.maxBatchSize' in configdic else 4096\n", + " \n", + " self.realexecutors = df.where(~F.isnull(F.col(\"Executor ID\"))).select(\"Executor ID\").distinct().count()\n", + " \n", + " execstart = df.where(\"Event='org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart'\").select(\"executionId\",\"time\")\n", + " execend = df.where(\"Event='org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd'\").select(\"executionId\",\"time\")\n", + " execstart=execstart.withColumnRenamed(\"time\",\"query_starttime\").withColumnRenamed(\"executionId\",\"queryid\")\n", + " execend=execend.withColumnRenamed(\"time\",\"query_endtime\").withColumnRenamed(\"executionId\",\"queryid\")\n", + " exectime = execstart.join(execend,on=[\"queryid\"])\n", + "\n", + " if \"spark.sql.execution.id\" in df.where(\"Event='SparkListenerJobStart'\").select(\"Properties.*\").columns:\n", + " df_jobstart=df.where(\"Event='SparkListenerJobStart'\").select(\"Job ID\",\"Submission Time\",F.col(\"Properties.`spark.sql.execution.id`\").alias(\"queryid\"),\"Stage IDs\")\n", + " else:\n", + " df_jobstart=df.where(\"Event='SparkListenerJobStart'\").select(\"Job ID\",\"Submission Time\",F.lit(0).alias(\"queryid\"),\"Stage IDs\")\n", + " \n", + " df_jobend=df.where(\"Event='SparkListenerJobEnd'\").select(\"`Job ID`\",\"Completion Time\")\n", + " df_job=df_jobstart.join(df_jobend,\"Job ID\")\n", + " df_job=df_job.withColumnRenamed(\"Submission Time\",\"job_start_time\")\n", + " df_job=df_job.withColumnRenamed(\"Completion Time\",\"job_stop_time\")\n", + " self.df_job=df_job\n", + " \n", + " jobstage=df_job.select(\"*\",F.explode(\"Stage IDs\").alias(\"Stage ID\"))\n", + " task=df.where(\"(Event='SparkListenerTaskEnd' or Event='SparkListenerTaskStart') \").select(\"Event\",\"Stage ID\",\"task info.*\",\"task metrics.*\")\n", + " \n", + " self.failed_stages = [str(l['Stage ID']) for l in task.where(\"Failed='true'\").select(\"Stage ID\").distinct().collect()]\n", + " \n", + " self.speculativetask = task.where(\"speculative = 'true'\").count()\n", + " self.speculativekilledtask = task.where(\"speculative = true and killed='true'\").count()\n", + " self.speculativestage = task.where(\"speculative = true and killed='true'\").select(\"`Stage ID`\").distinct().count()\n", + " \n", + " validtsk = task.where(\"Event = 'SparkListenerTaskEnd' and (Failed<>'true' or killed<>'true')\").select(\"`Task ID`\")\n", + " task=task.join(validtsk,on='Task ID',how='inner')\n", + " \n", + " taskjob=task.\\\n", + " select(\"Host\",\"`Event`\",\"`Launch Time`\",\"`Executor ID`\",\"`Task ID`\",\"`Finish Time`\",\n", + " \"`Stage ID`\",\"`Input Metrics`.`Bytes Read`\",\"`Disk Bytes Spilled`\",\"`Memory Bytes Spilled`\",\"`Shuffle Read Metrics`.`Local Bytes Read`\",\"`Shuffle Read Metrics`.`Remote Bytes Read`\",\n", + " \"`Shuffle Write Metrics`.`Shuffle Bytes Written`\",\"`Executor Deserialize Time`\",\"`Shuffle Read Metrics`.`Fetch Wait Time`\",\"`Executor Run Time`\",\"`Shuffle Write Metrics`.`Shuffle Write Time`\",\n", + " \"`Result Serialization Time`\",\"`Getting Result Time`\",\"`JVM GC Time`\",\"`Executor CPU Time`\",\"Accumulables\",\"Peak Execution Memory\",\n", + " F.when(task['Finish Time']==0,task['Launch Time']).otherwise(task['Finish Time']).alias('eventtime')\n", + " ).join(jobstage,\"Stage ID\").where(\"`Finish Time` is null or `Finish Time` <=job_stop_time+5\")\n", + " \n", + " taskjob = taskjob.join(exectime,on=['queryid'],how='left')\n", + " \n", + " self.df=taskjob\n", + " \n", + " if len(jobids)>0:\n", + " self.df=self.df.where('`Job ID` in ({:s})'.format(','.join(jobids)))\n", + " \n", + " queryids=self.df.select(F.col(\"queryid\").astype(IntegerType())).distinct().where(\"queryid is not null\").orderBy(\"queryid\").toPandas()\n", + " \n", + " self.query_num=len(queryids)\n", + " if self.query_num>0:\n", + " queryidx=queryids.reset_index()\n", + " queryidx['index']=queryidx['index']+1\n", + " #tpcds query\n", + " if self.query_num==103:\n", + " queryidx['index']=queryidx['index'].map(tpcds_query_map)\n", + " qidx=spark.createDataFrame(queryidx)\n", + " qidx=qidx.withColumnRenamed(\"index\",\"real_queryid\")\n", + " self.df=self.df.join(qidx,on=\"queryid\",how=\"left\")\n", + " if self.dfacc is not None:\n", + " self.dfacc=self.dfacc.join(qidx,on=\"queryid\",how='left')\n", + "\n", + " if self.queryplans:\n", + " self.queryplans=self.queryplans.join(qidx,\"queryid\",how=\"right\")\n", + " \n", + " self.df=self.df.fillna(0)\n", + " self.df=self.df.withColumn('Executor ID',F.when(F.col(\"Executor ID\")==\"driver\",1).otherwise(F.col(\"Executor ID\")))\n", + " self.df.cache()\n", + " \n", + " \n", + " \n", + " ##############################\n", + " \n", + " dfx=self.df.where(\"Event='SparkListenerTaskEnd'\").select(\"Stage ID\",\"Launch Time\",\"Finish Time\",\"Task ID\")\n", + " dfxpds=dfx.toPandas()\n", + " dfxpds.columns=[l.replace(\" \",\"_\") for l in dfxpds.columns]\n", + " dfxpds_ods=sqldf('''select * from dfxpds order by finish_time desc''')\n", + " criticaltasks=[]\n", + " idx=0\n", + " prefinish=0\n", + " launchtime=dfxpds_ods[\"Launch_Time\"][0]\n", + " criticaltasks.append([dfxpds_ods[\"Task_ID\"][0],launchtime,dfxpds_ods[\"Finish_Time\"][0]])\n", + " total_row=len(dfxpds_ods)\n", + "\n", + " while True:\n", + " while idx=launchtime else cur_finish\n", + " launchtime=dfxpds_ods[\"Launch_Time\"][idx]\n", + " criticaltasks.append([dfxpds_ods[\"Task_ID\"][idx],launchtime,cur_finish])\n", + " self.criticaltasks=criticaltasks\n", + "\n", + " def get_physical_plan(appals,**kwargs):\n", + " if appals.df is None:\n", + " appals.load_data()\n", + " queryid=kwargs.get('queryid',None)\n", + " shownops=kwargs.get(\"shownops\",['ArrowRowToColumnarExec','ColumnarToRow','RowToArrowColumnar',\n", + " 'VeloxNativeColumnarToRowExec','ArrowColumnarToRow','Filter','HashAggregate','Project','SortAggregate','SortMergeJoin','window'])\n", + " \n", + " desensitization=kwargs.get('desensitization',True)\n", + " \n", + " def get_fields(colss):\n", + " lvls=0\n", + " colns=[]\n", + " ks=\"\"\n", + " for c in colss:\n", + " if c==\",\" and lvls==0:\n", + " colns.append(ks)\n", + " ks=\"\"\n", + " continue\n", + " if c==\" \" and ks==\"\":\n", + " continue\n", + " if c==\"(\":\n", + " lvls+=1\n", + " if c==\")\":\n", + " lvls-=1\n", + " ks+=c\n", + " if ks!=\"\":\n", + " colns.append(ks)\n", + " return colns\n", + " \n", + " def get_column_names(s, opname, resultname, prefix, columns, funcs):\n", + " p=re.search(r\" \"+opname+\" \",s[0])\n", + " if p:\n", + " for v in s[1].split(\"\\n\"):\n", + " if v.startswith(resultname):\n", + " cols=re.search(\"\\[([^0-9].+)\\]\",v)\n", + " if cols:\n", + " colss=cols.group(1)\n", + " colns=get_fields(colss)\n", + " if opname+str(len(columns)) not in funcs:\n", + " funcs[opname+str(len(columns))]=[]\n", + " funcs[opname+str(len(columns))].extend(colns)\n", + " for c in colns:\n", + " if \" AS \" in c:\n", + " c=re.sub(\"#\\d+L*\",\"\",c)\n", + " colname=re.search(r\" AS (.+)\",c).group(1)\n", + " if colname not in columns:\n", + " columns[colname]=prefix\n", + " \n", + " plans=appals.queryplans.select('real_queryid','physicalPlanDescription').collect() if queryid is None else appals.queryplans.where(f\"real_queryid='{queryid}'\").select(\"physicalPlanDescription\").collect()\n", + " \n", + " for pr in range(0,len(plans)):\n", + " plan=plans[pr]['physicalPlanDescription']\n", + " nodes={}\n", + " lines=plan.split(\"\\n\")\n", + " for idx in range(0,len(lines)):\n", + " l=lines[idx]\n", + " if l=='+- == Final Plan ==':\n", + " while l!='+- == Initial Plan ==':\n", + " idx+=1\n", + " l=lines[idx]\n", + " if not l.endswith(\")\"):\n", + " break\n", + " idv=re.search(\"\\(\\d+\\)$\",l).group(0)\n", + " nodes[idv]=[l]\n", + " if l==\"== Physical Plan ==\":\n", + " while not lines[idx+1].startswith(\"(\"):\n", + " idx+=1\n", + " l=lines[idx]\n", + " if not l.endswith(\")\"):\n", + " break\n", + " idv=re.search(\"\\(\\d+\\)$\",l).group(0)\n", + " nodes[idv]=[l]\n", + " \n", + " if l.startswith(\"(\"):\n", + " idv=re.search(\"^\\(\\d+\\)\",l).group(0)\n", + " if idv in nodes:\n", + " desc=\"\"\n", + " while l.strip()!=\"\":\n", + " desc+=l+\"\\n\"\n", + " idx+=1\n", + " l=lines[idx]\n", + " desc=re.sub(r\"#\\d+L*\",r\"\",desc)\n", + " desc=re.sub(r\"= [^)]+\",r\"=\",desc)\n", + " desc=re.sub(r\"IN \\([^)]\\)\",r\"IN ()\",desc)\n", + " desc=re.sub(r\"In\\([^)]\\)\",r\"In()\",desc)\n", + " desc=re.sub(r\"EqualTo\\(([^,]+),[^)]+\\)\",r\"EqualTo(\\1,)\",desc)\n", + " desc=re.sub(r\"搜索广告\",r\"xxx\",desc)\n", + " ## add all keyword replace here\n", + " nodes[idv].append(desc)\n", + " tables={}\n", + " columns={}\n", + " functions={}\n", + " for s in nodes.values():\n", + " p=re.search(r\"Scan arrow [^.]*\\.([^ ]+)\",s[0])\n", + " if p:\n", + " tn=p.group(1)\n", + " if not tn in tables:\n", + " tables[tn]=\"table\"\n", + " if desensitization:\n", + " s[0]=s[0].replace(tn,tables[tn])\n", + " s[1]=s[1].replace(tn,tables[tn])\n", + " colsv=[]\n", + " schema=[]\n", + " for v in s[1].split(\"\\n\"):\n", + " if v.startswith(\"ReadSchema\"):\n", + " cols=re.search(\"<(.*)>\",v)\n", + " if cols:\n", + " colss=cols.group(1).split(\",\")\n", + " for c in colss:\n", + " cts=c.split(\":\")\n", + " ct=cts[0]\n", + " if not ct in columns:\n", + " if len(cts)==2:\n", + " cts[1]=cts[1]\n", + " columns[ct]=cts[1]+\"_\"\n", + " else:\n", + " columns[ct]=\"c_\"\n", + " if v.startswith(\"Location\") and desensitization:\n", + " s[1]=s[1].replace(v+\"\\n\",\"\")\n", + " \n", + " get_column_names(s, \"Project\", \"Output\", \"proj_\", columns, functions)\n", + " get_column_names(s, \"HashAggregate\", \"Results\", \"shagg_\", columns, functions)\n", + " get_column_names(s, \"SortAggregate\", \"Results\", \"stagg_\", columns, functions)\n", + " get_column_names(s, \"ColumnarConditionProject\", \"Arguments\", \"cproj_\", columns, functions)\n", + " get_column_names(s, \"ColumnarHashAggregate\", \"Results\", \"cshagg_\", columns, functions)\n", + " get_column_names(s, \"Window\", \"Arguments\", \"window_\", columns, functions)\n", + "\n", + " keys=[]\n", + " ckeys=list(columns.keys())\n", + " for l in range(0,len(ckeys)):\n", + " k1=ckeys[l]\n", + " for k in range(0,len(keys)):\n", + " if keys[k] in k1:\n", + " keys.insert(k,k1)\n", + " break\n", + " else:\n", + " keys.append(k1)\n", + " \n", + " for s in nodes.values():\n", + " s[1]=html.escape(s[1])\n", + " if desensitization:\n", + " for c in keys:\n", + " v=columns[c]\n", + " if v.startswith(\"array\") or v.startswith(\"map\") or v.startswith(\"struct\"):\n", + " s[1]=re.sub(c, ''+html.escape(v)+\"\",s[1])\n", + " else:\n", + " s[1]=re.sub(c, \"\"+html.escape(v)+\"\",s[1])\n", + "\n", + "\n", + " htmls=['''''']\n", + " qid=pr+1 if queryid is None else queryid\n", + " htmls.append(f\"\")\n", + " for l in nodes.values():\n", + " if shownops is not None:\n", + " for k in shownops:\n", + " if \" \"+k+\" \" in l[0]:\n", + " break\n", + " else:\n", + " continue\n", + " htmls.append(\"\")\n", + " htmls.append('\")\n", + " htmls.append('\")\n", + " htmls.append(\"\")\n", + " htmls.append(\"
{qid}
')\n", + " htmls.append(l[0].replace(\" \",\"_\")\n", + " .replace(\"ColumnarToRow\",\"ColumnarToRow\")\n", + " .replace(\"RowToArrowColumnar\",\"RowToArrowColumnar\")\n", + " .replace(\"ArrowColumnarToRow\",\"ArrowColumnarToRow\")\n", + " .replace(\"ArrowRowToColumnar\",\"ArrowRowToColumnar\")\n", + " .replace(\"VeloxNativeColumnarToRowExec\",\"VeloxNativeColumnarToRowExec\")\n", + " )\n", + " htmls.append(\"
')\n", + " ls=l[1].split(\"\\n\")\n", + " lsx=[]\n", + " for t in ls:\n", + " cols=re.search(\"\\[([^0-9].+)\\]\",t)\n", + " if cols:\n", + " colss=cols.group(1)\n", + " colns=get_fields(colss)\n", + " t=re.sub(\"\\[([^0-9].+)\\]\",\"\",t)\n", + " t+=\"[\"+';'.join(colns)+\"]\" \n", + " if \":\" in t:\n", + " lsx.append(re.sub(r'^([^:]+:)',r'\\1',t))\n", + " else:\n", + " lsx.append(t)\n", + " htmls.append(\"
\".join(lsx))\n", + " htmls.append(\"
\")\n", + " display(HTML(\"\\n\".join(htmls)))\n", + " \n", + " for k, v in functions.items():\n", + " functions[k]=[l for l in v if \"(\" in l]\n", + " for f in functions.values():\n", + " for idx in range(0,len(f)):\n", + " for c in keys:\n", + " v=columns[c]\n", + " if v.startswith(\"array\") or v.startswith(\"map\") or v.startswith(\"struct\"):\n", + " f[idx]=re.sub(c, ''+html.escape(v)+\"\",f[idx])\n", + " else:\n", + " f[idx]=re.sub(c, \"\"+html.escape(v)+\"\",f[idx])\n", + " funchtml=\"\"\n", + " for k,v in functions.items():\n", + " if shownops is not None:\n", + " for ks in shownops:\n", + " if \" \"+ks+\" \" in k:\n", + " break\n", + " else:\n", + " continue\n", + " funchtml+=\"\"\n", + " funchtml+=\"
\"+k+''\n", + " for f in v:\n", + " funchtml+='\"\n", + " funchtml+=\"
'+f+\"
\" \n", + " display(HTML(funchtml))\n", + " \n", + " return plans\n", + " \n", + " def get_physical_allnodes(appals,**kwargs):\n", + " if appals.df is None:\n", + " appals.load_data()\n", + " queryid=None\n", + " \n", + " plans=appals.queryplans.select('real_queryid','physicalPlanDescription').collect() if queryid is None else appals.queryplans.where(f\"real_queryid='{queryid}'\").select(\"physicalPlanDescription\").collect()\n", + " \n", + " allnodes={}\n", + " for pr in range(0,len(plans)):\n", + " plan=plans[pr]['physicalPlanDescription']\n", + " allnodes[pr]={}\n", + " nodes=allnodes[pr]\n", + " if plan is None:\n", + " continue\n", + " lines=plan.split(\"\\n\")\n", + " for idx in range(0,len(lines)):\n", + " l=lines[idx]\n", + " if l=='+- == Final Plan ==':\n", + " while l!='+- == Initial Plan ==':\n", + " idx+=1\n", + " l=lines[idx]\n", + " if not l.endswith(\")\"):\n", + " break\n", + " idv=re.search(\"\\(\\d+\\)$\",l).group(0)\n", + " nodes[idv]=[l]\n", + " if l.startswith(\"(\"):\n", + " idv=re.search(\"^\\(\\d+\\)\",l).group(0)\n", + " if idv in nodes:\n", + " desc=\"\"\n", + " while l!=\"\":\n", + " desc+=l+\"\\n\"\n", + " idx+=1\n", + " l=lines[idx]\n", + " nodes[idv].append(desc)\n", + " return allnodes\n", + " \n", + " \n", + " def get_basic_state(appals):\n", + " if appals.df is None:\n", + " appals.load_data()\n", + " display(HTML(f\"http://{localhost}:18080/history/{appals.appid}\"))\n", + " \n", + " errorcolor=\"#000000\" if appals.executor_instances == appals.realexecutors else \"#c0392b\"\n", + " \n", + " qtime=appals.get_query_time(plot=False)\n", + " sums=qtime.sum()\n", + " \n", + " total_rchar,total_wchar,total_read_bytes,total_write_bytes,total_cancelled_write_bytes = getexecutor_stat(appals.file[:-len(\"app.log\")])\n", + " \n", + " if len(appals.failed_stages)>0:\n", + " failure=\"
\".join([\"query: \" + str(l[\"real_queryid\"])+\"|stage: \" + str(l[\"Stage ID\"]) for l in appals.df.where(\"`Stage ID` in (\"+\",\".join(appals.failed_stages)+\")\").select(\"real_queryid\",\"Stage ID\").distinct().collect()])\n", + " else:\n", + " failure=\"\"\n", + " \n", + " stats={\"appid\":appals.appid,\n", + " \"executor.instances\":appals.executor_instances,\n", + " \"executor.cores\":appals.executor_cores,\n", + " \"shuffle.partitions\":appals.parallelism,\n", + " \"batch size\":appals.batchsize,\n", + " \"real executors\":appals.realexecutors,\n", + " \"Failed Tasks\":failure,\n", + " \"Speculative Tasks\":appals.speculativetask,\n", + " \"Speculative Killed Tasks\":appals.speculativekilledtask,\n", + " \"Speculative Stage\":appals.speculativestage,\n", + " \"runtime\":round(sums['runtime'],2),\n", + " \"disk spilled\":round(sums['disk spilled'],2),\n", + " \"memspilled\":round(sums['memspilled'],2),\n", + " \"local_read\":round(sums['local_read'],2),\n", + " \"remote_read\":round(sums['remote_read'],2),\n", + " \"shuffle_write\":round(sums['shuffle_write'],2),\n", + " \"task run time\":round(sums['run_time'],2),\n", + " \"ser_time\":round(sums['ser_time'],2),\n", + " \"f_wait_time\":round(sums['f_wait_time'],2),\n", + " \"gc_time\":round(sums['gc_time'],2),\n", + " \"input read\":round(sums['input read'],2),\n", + " \"acc_task_time\":round(sums['acc_task_time'],2),\n", + " \"file read size\":round(total_rchar,2),\n", + " \"file write size\":round(total_wchar,2),\n", + " \"disk read size\":round(total_read_bytes,2),\n", + " \"disk write size\":round(total_write_bytes,2),\n", + " \"disk cancel size\":round(total_cancelled_write_bytes,2)\n", + " }\n", + " \n", + " display(HTML(f'''\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
appid{appals.appid}
executor.instances{appals.executor_instances}
executor.cores{appals.executor_cores}
shuffle.partitions{(appals.parallelism)}
batch size{(appals.batchsize):,}
real executors{(appals.realexecutors)}
Failed Tasks{(failure)}
Speculative Tasks{(appals.speculativetask)}
Speculative Killed Tasks{(appals.speculativekilledtask)}
Speculative Stage{(appals.speculativestage)}
runtime{round(sums['runtime'],2):,}
disk spilled{round(sums['disk spilled'],2):,}
memspilled{round(sums['memspilled'],2):,}
local_read{round(sums['local_read'],2):,}
remote_read{round(sums['remote_read'],2):,}
shuffle_write{round(sums['shuffle_write'],2):,}
task run time{round(sums['run_time'],2):,}
ser_time{round(sums['ser_time'],2):,}
f_wait_time{round(sums['f_wait_time'],2):,}
gc_time{round(sums['gc_time'],2):,}
input read{round(sums['input read'],2):,}
acc_task_time{round(sums['acc_task_time'],2):,}
file read size{round(total_rchar,2):,}
file write size{round(total_wchar,2):,}
disk read size{round(total_read_bytes,2):,}
disk write size{round(total_write_bytes,2):,}
disk cancel size{round(total_cancelled_write_bytes,2):,}
\n", + "\n", + " '''))\n", + " return stats\n", + " \n", + " \n", + " def generate_trace_view_list_exec(self,id=0,**kwargs):\n", + " Analysis.generate_trace_view_list(self,**kwargs)\n", + " showcpu=kwargs.get('showcpu',False)\n", + " shownodes=kwargs.get(\"shownodes\",None)\n", + " \n", + " showdf=self.df.where(F.col(\"Host\").isin(shownodes)) if shownodes else self.df\n", + " \n", + " events=showdf.toPandas()\n", + " coretrack={}\n", + " trace_events=[]\n", + " starttime=self.starttime\n", + " taskend=[]\n", + " trace={\"traceEvents\":[]}\n", + " exec_hosts={}\n", + " hostsdf=showdf.select(\"Host\").distinct().orderBy(\"Host\")\n", + " hostid=100000\n", + " ended_event=[]\n", + " \n", + " for i,l in hostsdf.toPandas().iterrows():\n", + " exec_hosts[l['Host']]=hostid\n", + " hostid=hostid+100000\n", + "\n", + " for idx,l in events.iterrows():\n", + " if l['Event']=='SparkListenerTaskStart':\n", + " hostid=exec_hosts[l['Host']]\n", + "\n", + " tsk=l['Task ID']\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " self.pids.append(pid)\n", + " stime=l['Launch Time']\n", + " #the task's starttime and finishtime is the same, ignore it.\n", + " if tsk in ended_event:\n", + " continue\n", + " if not pid in coretrack:\n", + " tids={}\n", + " trace_events.append({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"{:s}.{:s}\".format(l['Host'],l['Executor ID'])}\n", + " })\n", + "\n", + " else:\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==-1:\n", + " tids[t]=[tsk,stime]\n", + " break\n", + " else:\n", + " t=len(tids)\n", + " tids[t]=[tsk,stime]\n", + " #print(\"task {:d} tid is {:s}.{:d}\".format(tsk,pid,t))\n", + " coretrack[pid]=tids\n", + "\n", + " if l['Event']=='SparkListenerTaskEnd':\n", + " sevt={}\n", + " eevt={}\n", + " hostid=exec_hosts[l['Host']]\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " tsk=l['Task ID']\n", + " fintime=l['Finish Time']\n", + "\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==tsk:\n", + " tids[t]=[-1,-1]\n", + " break\n", + " else:\n", + " ended_event.append(tsk)\n", + " continue\n", + " for ps in reversed([key for key in tids.keys()]) :\n", + " if tids[ps][1]-fintime<0 and tids[ps][1]-fintime>=-2:\n", + " fintime=tids[ps][1]\n", + " tids[t]=tids[ps]\n", + " tids[ps]=[-1,-1]\n", + " break\n", + " if starttime==0:\n", + " starttime=l['Launch Time']\n", + " print(f'applog start time: {starttime}')\n", + "\n", + " sstime=l['Launch Time']-starttime\n", + "\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':sstime,\n", + " 'dur':fintime-l['Launch Time'],\n", + " 'pid':pid,\n", + " \"ph\":'X',\n", + " 'name':\"stg{:d}\".format(l['Stage ID']),\n", + " 'args':{\"job id\": l['job id'],\n", + " \"stage id\": l['Stage ID'],\n", + " \"tskid\":tsk,\n", + " \"input\":builtins.round(l[\"Bytes Read\"]/1024/1024,2),\n", + " \"spill\":builtins.round(l[\"Memory Bytes Spilled\"]/1024/1024,2),\n", + " \"Shuffle Read Metrics\": \"\",\n", + " \"|---Local Read\": builtins.round(l[\"Local Bytes Read\"]/1024/1024,2),\n", + " \"|---Remote Read\":builtins.round(l[\"Remote Bytes Read\"]/1024/1024,2),\n", + " \"Shuffle Write Metrics\": \"\",\n", + " \"|---Write\":builtins.round(l['Shuffle Bytes Written']/1024/1024,2)\n", + " }\n", + " })\n", + "\n", + " des_time=l['Executor Deserialize Time']\n", + " read_time=l['Fetch Wait Time']\n", + " exec_time=l['Executor Run Time']\n", + " write_time=math.floor(l['Shuffle Write Time']/1000000)\n", + " ser_time=l['Result Serialization Time']\n", + " getrst_time=l['Getting Result Time']\n", + " durtime=fintime-sstime-starttime;\n", + "\n", + " times=[0,des_time,read_time,exec_time,write_time,ser_time,getrst_time]\n", + " time_names=['sched delay','deserialize time','read time','executor time','write time','serialize time','result time']\n", + " evttime=reduce((lambda x, y: x + y),times)\n", + " if evttime>durtime:\n", + " times=[math.floor(l*1.0*durtime/evttime) for l in times]\n", + " else:\n", + " times[0]=durtime-evttime\n", + "\n", + " esstime=sstime\n", + " for idx in range(0,len(times)):\n", + " if times[idx]>0:\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':esstime,\n", + " 'dur':times[idx], \n", + " 'pid':pid,\n", + " 'ph':'X',\n", + " 'name':time_names[idx]})\n", + " if idx==3:\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':esstime,\n", + " 'dur':l['JVM GC Time'],\n", + " 'pid':pid,\n", + " 'ph':'X',\n", + " 'name':'GC Time'})\n", + " if showcpu:\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':esstime,\n", + " 'pid':pid,\n", + " 'ph':'C',\n", + " 'name':'cpu% {:d}'.format(pid+int(t)),\n", + " 'args':{'value':l['Executor CPU Time']/1000000.0/times[idx]}})\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':esstime+times[idx],\n", + " 'pid':pid,\n", + " 'ph':'C',\n", + " 'name':'cpu% {:d}'.format(pid+int(t)),\n", + " 'args':{'value':0}})\n", + " esstime=esstime+times[idx]\n", + " self.starttime=starttime\n", + " return [json.dumps(l) for l in trace_events]\n", + "\n", + " def generate_trace_view_list(self,id=0,**kwargs):\n", + " Analysis.generate_trace_view_list(self,**kwargs)\n", + " showcpu=kwargs.get('showcpu',False)\n", + " shownodes=kwargs.get(\"shownodes\",None)\n", + " \n", + " showdf=self.df.where(F.col(\"Host\").isin(shownodes)) if shownodes else self.df\n", + " \n", + " showdf=showdf.orderBy([\"eventtime\", \"Finish Time\"], ascending=[1, 0])\n", + " \n", + " events=showdf.drop(\"Accumulables\").toPandas()\n", + " coretrack={}\n", + " trace_events=[]\n", + " starttime=self.starttime\n", + " taskend=[]\n", + " trace={\"traceEvents\":[]}\n", + " exec_hosts={}\n", + " hostsdf=showdf.select(\"Host\").distinct().orderBy(\"Host\")\n", + " hostid=100000\n", + " ended_event=[]\n", + " \n", + " for i,l in hostsdf.toPandas().iterrows():\n", + " exec_hosts[l['Host']]=hostid\n", + " hostid=hostid+100000\n", + "\n", + " tskmap={}\n", + " for idx,l in events.iterrows():\n", + " if l['Event']=='SparkListenerTaskStart':\n", + " hostid=exec_hosts[l['Host']]\n", + "\n", + " tsk=l['Task ID']\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " self.pids.append(pid)\n", + " stime=l['Launch Time']\n", + " #the task's starttime and finishtime is the same, ignore it.\n", + " if tsk in ended_event:\n", + " continue\n", + " if not pid in coretrack:\n", + " tids={}\n", + " trace_events.append({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"{:s}.{:s}\".format(l['Host'],l['Executor ID'])}\n", + " })\n", + "\n", + " else:\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==-1:\n", + " tids[t]=[tsk,stime]\n", + " break\n", + " else:\n", + " t=len(tids)\n", + " tids[t]=[tsk,stime]\n", + " #print(f\"task {tsk} tid is {pid}.{t}\")\n", + " coretrack[pid]=tids\n", + "\n", + " if l['Event']=='SparkListenerTaskEnd':\n", + " sevt={}\n", + " eevt={}\n", + " hostid=exec_hosts[l['Host']]\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " tsk=l['Task ID']\n", + " fintime=l['Finish Time']\n", + " \n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==tsk:\n", + " tids[t]=[-1,-1]\n", + " break\n", + " else:\n", + " ended_event.append(tsk)\n", + " continue\n", + " for ps in reversed([key for key in tids.keys()]) :\n", + " if tids[ps][1]-fintime<0 and tids[ps][1]-fintime>=-2:\n", + " fintime=tids[ps][1]\n", + " tids[t]=tids[ps]\n", + " tids[ps]=[-1,-1]\n", + " break\n", + " if starttime==0:\n", + " starttime=l['Launch Time']\n", + " print(f'applog start time: {starttime}')\n", + "\n", + " sstime=l['Launch Time']-starttime\n", + "\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':sstime,\n", + " 'dur':fintime-l['Launch Time'],\n", + " 'pid':pid,\n", + " \"ph\":'X',\n", + " 'name':\"stg{:d}\".format(l['Stage ID']),\n", + " 'args':{\"job id\": l['Job ID'],\n", + " \"stage id\": l['Stage ID'],\n", + " \"tskid\":tsk,\n", + " \"input\":builtins.round(l[\"Bytes Read\"]/1024/1024,2),\n", + " \"spill\":builtins.round(l[\"Memory Bytes Spilled\"]/1024/1024,2),\n", + " \"Shuffle Read Metrics\": \"\",\n", + " \"|---Local Read\": builtins.round(l[\"Local Bytes Read\"]/1024/1024,2),\n", + " \"|---Remote Read\":builtins.round(l[\"Remote Bytes Read\"]/1024/1024,2),\n", + " \"Shuffle Write Metrics\": \"\",\n", + " \"|---Write\":builtins.round(l['Shuffle Bytes Written']/1024/1024,2)\n", + " }\n", + " })\n", + " tskmap[tsk]={'pid':pid,'tid':pid+int(t)}\n", + "\n", + " self.starttime=starttime\n", + " self.tskmap=tskmap\n", + " output=[json.dumps(l) for l in trace_events]\n", + " \n", + " df=self.df\n", + " \n", + " if showcpu and len(self.metricscollect)>0:\n", + " metricscollect=self.metricscollect\n", + " metrics_explode=df.where(\"Event='SparkListenerTaskEnd'\").withColumn(\"metrics\",F.explode(\"Accumulables\"))\n", + " m1092=metrics_explode.select(F.col(\"Executor ID\"),F.col(\"`Stage ID`\"),\"`Task ID`\",F.col(\"`Finish Time`\"),F.col(\"`Launch Time`\"),(F.col(\"`Finish Time`\")-F.col(\"`Launch Time`\")).alias(\"elapsedtime\"),\"metrics.*\").where(F.col(\"ID\").isin([l[0] for l in metricscollect]))\n", + " metric_name_df = spark.createDataFrame(metricscollect)\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_1\",\"ID\")\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_2\",\"unit\")\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_3\",\"mname\")\n", + "\n", + " met_df=m1092.join(metric_name_df,on=\"ID\")\n", + " met_df=met_df.withColumn(\"Update\",F.when(F.col(\"unit\")=='nsTiming',F.col(\"Update\")/1000000).otherwise(F.col(\"Update\")+0))\n", + " met_df=met_df.where(\"Update>1\")\n", + "\n", + " metdfx=met_df.groupBy(\"Task ID\",\"elapsedtime\").agg(F.sum(\"Update\").alias(\"totalCnt\"))\n", + " taskratio=metdfx.withColumn(\"ratio\",F.when(F.col(\"totalCnt\") 'time to collect batch' and mname <> 'time of scan'\")\n", + "\n", + " met_df=m1092.join(metric_name_df,on=\"ID\")\n", + " met_df=met_df.withColumn(\"Update\",F.when(F.col(\"unit\")=='nsTiming',F.col(\"Update\")/1000000).otherwise(F.col(\"Update\")+0))\n", + " \n", + " #pandas UDF doesn't work. hang\n", + " #tmbk=met_df.groupBy('Task ID').apply(time_breakdown)\n", + " \n", + " w=Window.partitionBy('Task ID')\n", + " met_df1=met_df.withColumn(\"sum_update\",F.sum(\"Update\").over(w))\n", + " met_df2=met_df1.withColumn(\"ratio\",(F.col(\"Finish Time\")-F.col(\"Launch Time\")-2)/F.col(\"sum_update\"))\n", + " met_df3=met_df2.withColumn(\"ratio\",F.when(F.col(\"ratio\")>1,1).otherwise(F.col(\"ratio\")))\n", + " met_df4=met_df3.withColumn(\"update_ratio\",F.floor(F.col(\"ratio\")*F.col(\"Update\")))\n", + " met_df5=met_df4.where(F.col(\"update_ratio\")>2)\n", + " w = (Window.partitionBy('Task ID').orderBy(F.desc(\"update_ratio\")).rowsBetween(Window.unboundedPreceding, Window.currentRow))\n", + " met_df6=met_df5.withColumn('ltime_dur', F.sum('update_ratio').over(w))\n", + " met_df8=met_df6.withColumn(\"ltime\",F.col(\"ltime_dur\")+F.col(\"Launch Time\")-F.col(\"update_ratio\"))\n", + "\n", + " tmbk=met_df8.withColumn(\"taskid\",F.col(\"Task ID\")).withColumn(\"start\",F.col(\"ltime\")+F.lit(1)).withColumn(\"dur\",F.col(\"update_ratio\")-F.lit(1)).withColumn(\"name\",F.col(\"mname\"))\n", + " \n", + " \n", + " traces.extend(tmbk.select(\n", + " F.lit(38).alias(\"tid\"),\n", + " (F.col(\"start\")-F.lit(self.starttime)).alias(\"ts\"),\n", + " (F.col(\"dur\")).alias(\"dur\"),\n", + " F.lit(pid).alias(\"pid\"),\n", + " F.lit(\"X\").alias(\"ph\"),\n", + " F.col(\"name\").alias(\"name\")).toJSON().collect())\n", + " traces.append(json.dumps({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"critical path\"}\n", + " }))\n", + " return traces \n", + " \n", + " def show_Stage_histogram(apps,stageid,bincount):\n", + " if apps.df is None:\n", + " apps.load_data()\n", + " \n", + " inputsize = apps.df.where(\"`Stage ID`={:d}\".format(stageid)).select(\"Stage ID\",\"Executor ID\", \"Task ID\", F.explode(\"Accumulables\")) \\\n", + " .select(\"Stage ID\",\"Executor ID\", \"Task ID\",\"col.*\") \\\n", + " .where(\"Name='input size in bytes' or Name='size of files read'\") \\\n", + " .groupBy(\"Task ID\") \\\n", + " .agg((F.sum(\"Update\")).alias(\"input read\"))\n", + "\n", + "\n", + " stage37=apps.df.where(\"`Stage ID`={:d} and event='SparkListenerTaskEnd'\".format(stageid) )\\\n", + " .join(inputsize,on=[\"Task ID\"],how=\"left\")\\\n", + " .fillna(0) \\\n", + " .select(F.col('Host'), \n", + " F.round((F.col('Finish Time')/1000-F.col('Launch Time')/1000),2).alias('elapsedtime'),\n", + " F.round((F.col('`input read`')+F.col('`Bytes Read`')+F.col('`Local Bytes Read`')+F.col('`Remote Bytes Read`'))/1024/1024,2).alias('input'))\n", + " stage37=stage37.cache()\n", + " hist_elapsedtime=stage37.select('elapsedtime').rdd.flatMap(lambda x: x).histogram(15)\n", + " hist_input=stage37.select('input').rdd.flatMap(lambda x: x).histogram(15)\n", + " fig, axs = plt.subplots(figsize=(30, 5),nrows=1, ncols=2)\n", + " ax=axs[0]\n", + " binSides, binCounts = hist_elapsedtime\n", + " binSides=[builtins.round(l,2) for l in binSides]\n", + "\n", + " N = len(binCounts)\n", + " ind = numpy.arange(N)\n", + " width = 0.5\n", + "\n", + " rects1 = ax.bar(ind+0.5, binCounts, width, color='b')\n", + "\n", + " ax.set_ylabel('Frequencies')\n", + " ax.set_title('stage{:d} elapsed time breakdown'.format(stageid))\n", + " ax.set_xticks(numpy.arange(N+1))\n", + " ax.set_xticklabels(binSides)\n", + "\n", + " ax=axs[1]\n", + " binSides, binCounts = hist_input\n", + " binSides=[builtins.round(l,2) for l in binSides]\n", + "\n", + " N = len(binCounts)\n", + " ind = numpy.arange(N)\n", + " width = 0.5\n", + " rects1 = ax.bar(ind+0.5, binCounts, width, color='b')\n", + "\n", + " ax.set_ylabel('Frequencies')\n", + " ax.set_title('stage{:d} input data breakdown'.format(stageid))\n", + " ax.set_xticks(numpy.arange(N+1))\n", + " ax.set_xticklabels(binSides)\n", + "\n", + " out=stage37\n", + " outpds=out.toPandas()\n", + "\n", + " fig, axs = plt.subplots(nrows=1, ncols=3, sharey=False,figsize=(30,8),gridspec_kw = {'width_ratios':[1, 1, 1]})\n", + " plt.subplots_adjust(wspace=0.01)\n", + "\n", + " groups= outpds.groupby('Host')\n", + " for name, group in groups:\n", + " axs[0].plot(group.input, group.elapsedtime, marker='o', linestyle='', ms=5, label=name)\n", + " axs[0].set_xlabel('input size (MB)')\n", + " axs[0].set_ylabel('elapsed time (s)')\n", + "\n", + " axs[0].legend()\n", + "\n", + " axs[0].get_shared_y_axes().join(axs[0], axs[1])\n", + "\n", + " sns.violinplot(y='elapsedtime', x='Host', data=outpds,palette=['g'],ax=axs[1])\n", + "\n", + " sns.violinplot(y='input', x='Host', data=outpds,palette=['g'],ax=axs[2])\n", + "\n", + " #ax.xaxis.set_major_formatter(mtick.FormatStrFormatter(''))\n", + " #ax.yaxis.set_major_formatter(mtick.FormatStrFormatter(''))\n", + "\n", + " if False:\n", + " out=stage37\n", + " vecAssembler = VectorAssembler(inputCols=[\"input\",'elapsedtime'], outputCol=\"features\").setHandleInvalid(\"skip\")\n", + " new_df = vecAssembler.transform(out)\n", + " kmeans = KMeans(k=2, seed=1) # 2 clusters here\n", + " model = kmeans.fit(new_df.select('features'))\n", + " transformed = model.transform(new_df)\n", + "\n", + "\n", + " outpds=transformed.select('Host','elapsedtime','input','prediction').toPandas()\n", + "\n", + " fig, axs = plt.subplots(nrows=1, ncols=2, sharey=False,figsize=(30,8),gridspec_kw = {'width_ratios':[1, 1]})\n", + " plt.subplots_adjust(wspace=0.01)\n", + "\n", + " groups= outpds.groupby('prediction')\n", + " for name, group in groups:\n", + " axs[0].plot(group.input, group.elapsedtime, marker='o', linestyle='', ms=5, label=name)\n", + " axs[0].legend()\n", + "\n", + " bars=transformed.where('prediction=1').groupBy(\"Host\").count().toPandas()\n", + "\n", + " axs[1].bar(bars['Host'], bars['count'], 0.4, color='coral')\n", + " axs[1].set_title('cluster=1')\n", + "\n", + " plt.show()\n", + " \n", + " def show_Stages_hist(apps,**kwargs):\n", + " if apps.df is None:\n", + " apps.load_data()\n", + " \n", + " bincount=kwargs.get(\"bincount\",15)\n", + " threshold=kwargs.get(\"threshold\",0.9)\n", + " \n", + " query=kwargs.get(\"queryid\",None)\n", + " if query and type(query)==int:\n", + " query = [query,]\n", + " df=apps.df.where(F.col(\"real_queryid\").isin(query)) if query else apps.df\n", + " \n", + " totaltime=df.where(\"event='SparkListenerTaskEnd'\" ).agg(F.sum(F.col('Finish Time')-F.col('Launch Time')).alias('total_time')).collect()[0]['total_time']\n", + " stage_time=df.where(\"event='SparkListenerTaskEnd'\" ).groupBy('`Stage ID`').agg(F.sum(F.col('Finish Time')-F.col('Launch Time')).alias('total_time')).orderBy('total_time', ascending=False).toPandas()\n", + " stage_time['acc_total'] = stage_time['total_time'].cumsum()/totaltime\n", + " stage_time=stage_time.reset_index()\n", + " fig, ax = plt.subplots(figsize=(30, 5))\n", + "\n", + " rects1 = ax.plot(stage_time['index'],stage_time['acc_total'],'b.-')\n", + " ax.set_xticks(stage_time['index'])\n", + " ax.set_xticklabels(stage_time['Stage ID'])\n", + " ax.set_xlabel('stage')\n", + " ax.grid(which='major', axis='x')\n", + " plt.show()\n", + " shownstage=[]\n", + " for x in stage_time.index:\n", + " if stage_time['acc_total'][x]<=threshold:\n", + " shownstage.append(stage_time['Stage ID'][x])\n", + " else:\n", + " shownstage.append(stage_time['Stage ID'][x])\n", + " break\n", + " for row in shownstage:\n", + " apps.show_Stage_histogram(row,bincount) \n", + " \n", + " def get_hottest_stages(apps,**kwargs):\n", + " if apps.df is None:\n", + " apps.load_data()\n", + " \n", + " bincount=kwargs.get(\"bincount\",15)\n", + " threshold=kwargs.get(\"threshold\",0.9)\n", + " plot=kwargs.get(\"plot\",True)\n", + " \n", + " query=kwargs.get(\"queryid\",None)\n", + " if query and type(query)==int:\n", + " query = [query,]\n", + " df=apps.df.where(F.col(\"real_queryid\").isin(query)) if query else apps.df.where(\"queryid is not NULL\")\n", + "\n", + " stage_time=df.where(\"event='SparkListenerTaskEnd'\" ).groupBy('`Stage ID`','Job ID','real_queryid').agg(\n", + " F.sum(F.col('Finish Time')-F.col('Launch Time')).alias('total_time'),\n", + " F.stddev(F.col('Finish Time')/1000-F.col('Launch Time')/1000).alias('stdev_time'),\n", + " F.count(\"*\").alias(\"cnt\"),\n", + " F.first('queryid').astype(IntegerType()).alias('queryid')\n", + " )\\\n", + " .select('`Stage ID`','Job ID','real_queryid','queryid',\n", + " (F.col(\"total_time\")/1000/(F.when(F.col(\"cnt\")>F.lit(apps.executor_instances*apps.executor_cores/apps.taskcpus),F.lit(apps.executor_instances*apps.executor_cores/apps.taskcpus)).otherwise(F.col(\"cnt\")))).alias(\"total_time\"),\n", + " F.col(\"stdev_time\")\n", + " ).orderBy('total_time', ascending=False).toPandas()\n", + "\n", + " totaltime=stage_time['total_time'].sum()\n", + " stage_time['acc_total'] = stage_time['total_time'].cumsum()/totaltime\n", + " stage_time['total'] = stage_time['total_time']/totaltime\n", + " stage_time=stage_time.reset_index()\n", + "\n", + " shownstage=stage_time.loc[stage_time['acc_total'] <=threshold]\n", + " shownstage['stg']=shownstage['real_queryid'].astype(str)+'_'+shownstage['Job ID'].astype(str)+'_'+shownstage['Stage ID'].astype(str)\n", + " if plot:\n", + " shownstage.plot.bar(x=\"stg\",y=\"total\",figsize=(30,8))\n", + "\n", + "\n", + "\n", + " norm = matplotlib.colors.Normalize(vmin=0, vmax=max(stage_time.queryid))\n", + " cmap = matplotlib.cm.get_cmap('brg')\n", + " def setbkcolor(x):\n", + " rgba=cmap(norm(x['queryid']))\n", + " return ['background-color:rgba({:d},{:d},{:d},1); color:white'.format(int(rgba[0]*255),int(rgba[1]*255),int(rgba[2]*255))]*9\n", + "\n", + " if plot:\n", + " display(stage_time.style.apply(setbkcolor,axis=1).format({\"total_time\":lambda x: '{:,.2f}'.format(x),\"acc_total\":lambda x: '{:,.2%}'.format(x),\"total\":lambda x: '{:,.2%}'.format(x)}))\n", + " \n", + " return stage_time\n", + "\n", + " def scatter_elapsetime_input(apps,stageid):\n", + " if apps.df is None:\n", + " apps.load_data()\n", + " stage37=apps.df.where(\"`Stage ID`={:d} and event='SparkListenerTaskEnd'\".format(stageid) ).select(F.round((F.col('Finish Time')/1000-F.col('Launch Time')/1000),2).alias('elapsedtime'),F.round((F.col('`Bytes Read`')+F.col('`Local Bytes Read`')+F.col('`Remote Bytes Read`'))/1024/1024,2).alias('input')).toPandas()\n", + " stage37.plot.scatter('input','elapsedtime',figsize=(30, 5))\n", + "\n", + " def get_critical_path_stages(self): \n", + " df=self.df.where(\"Event='SparkListenerTaskEnd'\")\n", + " criticaltasks=self.criticaltasks\n", + " cripds=pandas.DataFrame(criticaltasks)\n", + " cripds.columns=['task_id',\"launch\",\"finish\"]\n", + " cridf=spark.createDataFrame(cripds)\n", + " df_ctsk=df.join(cridf,on=[F.col(\"task_id\")==F.col(\"Task ID\")],how=\"inner\")\n", + " df_ctsk=df_ctsk.withColumn(\"elapsed\",(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000)\n", + " return df_ctsk.where(\"elapsed>10\").orderBy(F.desc(\"elapsed\")).select(\"real_queryid\",F.round(\"elapsed\",2).alias(\"elapsed\"),\"Host\",\"executor ID\",\"Stage ID\",\"Task ID\",F.round(F.col(\"Bytes Read\")/1000000,0).alias(\"file read\"),F.round((F.col(\"Local Bytes Read\")+F.col(\"Remote Bytes Read\"))/1000000,0).alias(\"shuffle read\")).toPandas()\n", + " \n", + " def show_time_metric(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + " shownodes=kwargs.get(\"shownodes\",None)\n", + " query=kwargs.get(\"queryid\",None)\n", + " plot=kwargs.get(\"plot\",True)\n", + " taskids=kwargs.get(\"taskids\",None)\n", + " \n", + " if query and type(query)==int:\n", + " query = [query,]\n", + " \n", + " showexecutor=kwargs.get(\"showexecutor\",True) if not taskids else False\n", + " queryid = query[0] if query else 0\n", + " \n", + " df=self.df.where(F.col(\"Host\").isin(shownodes)) if shownodes else self.df\n", + " df=df.where(F.col(\"real_queryid\").isin(query)) if query else df.where(\"queryid is not NULL\")\n", + "\n", + " df=df.where(F.col(\"Task ID\").isin(taskids)) if taskids else df\n", + "\n", + " exec_cores=1 if taskids else self.executor_cores\n", + " execs=1 if taskids else self.executor_instances\n", + "\n", + " metricscollect=self.metricscollect\n", + "\n", + " metrics_explode=df.where(\"Event='SparkListenerTaskEnd'\").withColumn(\"metrics\",F.explode(\"Accumulables\"))\n", + " m1092=metrics_explode.select(F.col(\"Executor ID\"),F.col(\"`Stage ID`\"),\"`Task ID`\",F.col(\"`Finish Time`\"),F.col(\"`Launch Time`\"),(F.col(\"`Finish Time`\")-F.col(\"`Launch Time`\")).alias(\"elapsedtime\"),\"metrics.*\").where(F.col(\"ID\").isin([l[0] for l in metricscollect]))\n", + " metric_name_df = spark.createDataFrame(metricscollect)\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_1\",\"ID\")\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_2\",\"unit\")\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_3\",\"mname\")\n", + " metric_name_df=metric_name_df.withColumnRenamed(\"_4\",\"node\")\n", + "\n", + " runtime=metrics_explode.agg(F.round(F.max(\"Finish Time\")/1000-F.min(\"Launch Time\")/1000,2).alias(\"runtime\")).collect()[0][\"runtime\"]\n", + "\n", + " met_df=m1092.join(metric_name_df,on=\"ID\")\n", + " met_df=met_df.withColumn(\"Update\",F.when(F.col(\"unit\")=='nsTiming',F.col(\"Update\")/1000000).otherwise(F.col(\"Update\")+0))\n", + " outpdf=met_df.groupBy(\"`Executor ID`\",\"mname\").sum(\"Update\").orderBy(\"Executor ID\").toPandas()\n", + "\n", + " met_time_cnt=df.where(\"Event='SparkListenerTaskEnd'\")\n", + " exectime=met_time_cnt.groupBy(\"Executor ID\").agg((F.max(\"Finish Time\")-F.min(\"Launch Time\")).alias(\"totaltime\"),F.sum(F.col(\"`Finish Time`\")-F.col(\"`Launch Time`\")).alias(\"tasktime\"))\n", + "\n", + " totaltime_query=met_time_cnt.groupBy(\"real_queryid\").agg((F.max(\"Finish Time\")-F.min(\"Launch Time\")).alias(\"totaltime\")).agg(F.sum(\"totaltime\").alias(\"totaltime\")).collect()\n", + " totaltime_query=totaltime_query[0][\"totaltime\"]\n", + " \n", + " pdf=exectime.toPandas()\n", + " exeids=set(outpdf['Executor ID'])\n", + " outpdfs=[outpdf[outpdf[\"Executor ID\"]==l] for l in exeids]\n", + " tasktime=pdf.set_index(\"Executor ID\").to_dict()['tasktime']\n", + "\n", + " def comb(l,r):\n", + " execid=list(r['Executor ID'])[0]\n", + " lp=r[['mname','sum(Update)']]\n", + " lp.columns=[\"mname\",\"val_\"+execid]\n", + " idle=totaltime_query*exec_cores-tasktime[execid]\n", + " nocount=tasktime[execid]-sum(lp[\"val_\"+execid])\n", + " if idle<0:\n", + " idle=0\n", + " if nocount<0:\n", + " nocount=0\n", + " lp=lp.append([{\"mname\":\"idle\",\"val_\"+execid:idle}])\n", + " lp=lp.append([{\"mname\":\"not_counted\",\"val_\"+execid:nocount}])\n", + " if l is not None:\n", + " return pandas.merge(lp, l,on=[\"mname\"],how='outer')\n", + " else:\n", + " return lp\n", + "\n", + " rstpdf=None\n", + " for l in outpdfs[0:]:\n", + " rstpdf=comb(rstpdf,l)\n", + " \n", + " for l in [l for l in rstpdf.columns if l!=\"mname\"]:\n", + " rstpdf[l]=rstpdf[l]/1000/exec_cores\n", + " \n", + " rstpdf=rstpdf.sort_values(by=\"val_\"+list(exeids)[0],axis=0,ascending=False)\n", + " if showexecutor and plot:\n", + " rstpdf.set_index(\"mname\").T.plot.bar(stacked=True,figsize=(30,8))\n", + " pdf_sum=pandas.DataFrame(rstpdf.set_index(\"mname\").T.sum())\n", + " totaltime=totaltime_query/1000\n", + " pdf_sum[0]=pdf_sum[0]/(execs)\n", + " pdf_sum[0][\"idle\"]=(totaltime_query-sum(tasktime.values())/execs/exec_cores)/1000\n", + " pdf_sum=pdf_sum.sort_values(by=0,axis=0,ascending=False)\n", + " pdf_sum=pdf_sum.T\n", + " pdf_sum.columns=[\"{:>2.0f}%_{:s}\".format(pdf_sum[l][0]/totaltime*100,l) for l in pdf_sum.columns]\n", + " matplotlib.rcParams['font.sans-serif'] = \"monospace\"\n", + " matplotlib.rcParams['font.family'] = \"monospace\"\n", + " import matplotlib.font_manager as font_manager\n", + " if plot:\n", + " ax=pdf_sum.plot.bar(stacked=True,figsize=(30,8))\n", + " font = font_manager.FontProperties(family='monospace',\n", + " style='normal', size=14)\n", + " ax.legend(prop=font,loc=4)\n", + " plt.title(\"{:s} q{:d} executors={:d} cores_per_executor={:d} parallelism={:d} sumtime={:.0f} runtime={:.0f}\".format(self.file.split(\"/\")[2],queryid,self.executor_instances,self.executor_cores,self.parallelism,totaltime,runtime),fontdict={'fontsize':24})\n", + " return pdf_sum\n", + "\n", + " def show_critical_path_time_breakdown(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + " return self.show_time_metric(taskids=[l[0].item() for l in self.criticaltasks])\n", + " \n", + " def get_spark_config(self):\n", + " df=spark.read.json(self.file)\n", + " self.appid=df.where(\"`App ID` is not null\").collect()[0][\"App ID\"]\n", + " pandas.set_option('display.max_rows', None)\n", + " pandas.set_option('display.max_columns', None)\n", + " pandas.set_option('display.max_colwidth', 100000)\n", + " return df.select(\"Properties.*\").where(\"`spark.app.id` is not null\").limit(1).toPandas().T\n", + " \n", + " def get_app_name(self):\n", + " cfg=self.get_spark_config()\n", + " display(HTML(\"\" + cfg.loc[cfg.index=='spark.app.name'][0][0]+\"\"))\n", + " \n", + " \n", + " def get_query_time(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + " queryid=kwargs.get(\"queryid\",None)\n", + " showtable=kwargs.get(\"showtable\",True)\n", + " plot=kwargs.get(\"plot\",True)\n", + " \n", + " if queryid and type(queryid)==int:\n", + " queryid = [queryid,]\n", + " \n", + " df=self.df.where(F.col(\"real_queryid\").isin(queryid)) if queryid else self.df.where(\"queryid is not NULL\")\n", + " \n", + " \n", + " stages=df.select(\"real_queryid\",\"Stage ID\").distinct().orderBy(\"Stage ID\").groupBy(\"real_queryid\").agg(F.collect_list(\"Stage ID\").alias(\"stages\")).orderBy(\"real_queryid\")\n", + " runtimeacc=df.where(\"Event='SparkListenerTaskEnd'\") \\\n", + " .groupBy(\"real_queryid\") \\\n", + " .agg(F.round(F.sum(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000/self.executor_instances/self.executor_cores*self.taskcpus,2).alias(\"acc_task_time\"))\n", + " inputsize = df.select(\"real_queryid\",\"Stage ID\",\"Executor ID\", \"Task ID\", F.explode(\"Accumulables\")) \\\n", + " .select(\"real_queryid\",\"Stage ID\",\"Executor ID\", \"Task ID\",\"col.*\") \\\n", + " .where(\"Name='input size in bytes' or Name='size of files read'\") \\\n", + " .groupBy(\"real_queryid\") \\\n", + " .agg(F.round(F.sum(\"Update\")/1024/1024/1024,2).alias(\"input read\")).orderBy(\"real_queryid\")\n", + " if self.dfacc is not None:\n", + " inputsizev1 = self.dfacc.where(\"Name='size of files read'\").groupBy(\"real_queryid\").agg(F.round(F.sum(\"Update\")/1024/1024/1024,2).alias(\"input read v1\")).orderBy(\"real_queryid\")\n", + " inputsize=inputsize.join(inputsizev1,on=\"real_queryid\",how=\"outer\")\n", + " inputsize=inputsize.withColumn(\"input read\",F.coalesce(F.col(\"input read\"),F.col(\"input read v1\"))).drop(\"input read v1\")\n", + " \n", + " outputrows = df.select(\"real_queryid\",\"Stage ID\",\"Stage ID\",F.explode(\"Accumulables\"))\\\n", + " .select(\"real_queryid\",\"Stage ID\",\"Stage ID\",\"col.*\")\\\n", + " .where(\"Name='number of output rows'\")\\\n", + " .groupBy(\"real_queryid\")\\\n", + " .agg(F.round(F.sum(\"Update\")/1000000000,2).alias(\"output rows\"))\n", + " \n", + " stages=runtimeacc.join(stages,on=\"real_queryid\",how=\"left\")\n", + " stages=inputsize.join(stages,on=\"real_queryid\",how=\"left\")\n", + " stages=stages.join(outputrows,on='real_queryid',how=\"left\")\n", + " \n", + " out=df.groupBy(\"real_queryid\").agg(\n", + " F.round(F.max(\"query_endtime\")/1000-F.min(\"query_starttime\")/1000,2).alias(\"runtime\"),\n", + " F.round(F.sum(\"Disk Bytes Spilled\")/1024/1024/1024,2).alias(\"disk spilled\"),\n", + " F.round(F.sum(\"Memory Bytes Spilled\")/1024/1024/1024,2).alias(\"memspilled\"),\n", + " F.round(F.sum(\"Local Bytes Read\")/1024/1024/1024,2).alias(\"local_read\"),\n", + " F.round(F.sum(\"Remote Bytes Read\")/1024/1024/1024,2).alias(\"remote_read\"),\n", + " F.round(F.sum(\"Shuffle Bytes Written\")/1024/1024/1024,2).alias(\"shuffle_write\"),\n", + " F.round(F.sum(\"Executor Deserialize Time\")/1000/self.parallelism,2).alias(\"deser_time\"),\n", + " F.round(F.sum(\"Executor Run Time\")/1000/self.parallelism,2).alias(\"run_time\"),\n", + " F.round(F.sum(\"Result Serialization Time\")/1000/self.parallelism,2).alias(\"ser_time\"),\n", + " F.round(F.sum(\"Fetch Wait Time\")/1000/self.parallelism,2).alias(\"f_wait_time\"),\n", + " F.round(F.sum(\"JVM GC Time\")/1000/self.parallelism,2).alias(\"gc_time\"),\n", + " F.round(F.max(\"Peak Execution Memory\")/1000000000*self.executor_instances*self.executor_cores,2).alias(\"peak_mem\"),\n", + " F.max(\"queryid\").alias(\"queryid\")\n", + " ).join(stages,\"real_queryid\",how=\"left\").orderBy(\"real_queryid\").toPandas().set_index(\"real_queryid\")\n", + " out[\"executors\"]=self.executor_instances\n", + " out[\"core/exec\"]=self.executor_cores\n", + " out[\"task.cpus\"]=self.taskcpus\n", + " out['parallelism']=self.parallelism\n", + " \n", + " if not showtable:\n", + " return out\n", + "\n", + " def highlight_greater(x):\n", + " m1 = x['acc_task_time'] / x['runtime'] * 100\n", + " m2 = x['run_time'] / x['runtime'] * 100\n", + " m3 = x['f_wait_time'] / x['runtime'] * 100\n", + " \n", + "\n", + " df1 = pandas.DataFrame('', index=x.index, columns=x.columns)\n", + "\n", + " df1['acc_task_time'] = m1.apply(lambda x: 'background-image: linear-gradient(to right,#5fba7d {:f}%,white {:f}%)'.format(x,x))\n", + " df1['run_time'] = m2.apply(lambda x: 'background-image: linear-gradient(to right,#5fba7d {:f}%,white {:f}%)'.format(x,x))\n", + " df1['f_wait_time'] = m3.apply(lambda x: 'background-image: linear-gradient(to right,#d65f5f {:f}%,white {:f}%)'.format(x,x))\n", + " return df1\n", + "\n", + "\n", + " cm = sns.light_palette(\"green\", as_cmap=True)\n", + " if plot:\n", + " display(out.style.apply(highlight_greater, axis=None).background_gradient(cmap=cm,subset=['input read', 'shuffle_write']))\n", + " \n", + " return out\n", + " \n", + " def get_query_time_metric(self):\n", + " if self.df is None:\n", + " self.load_data()\n", + " querids=self.df.select(\"queryid\").distinct().collect()\n", + " for idx,q in enumerate([l[\"queryid\"] for l in querids]):\n", + " self.show_time_metric(query=[q,],showexecutor=False)\n", + " \n", + " def getOperatorCount(self):\n", + " if self.df is None:\n", + " self.load_data()\n", + " df=spark.read.json(self.file)\n", + " queryids=self.df.select(F.col(\"queryid\").astype(LongType()),F.col(\"real_queryid\")).distinct().orderBy(\"real_queryid\")\n", + " queryplans=self.queryplans.collect()\n", + " list_queryid=[l.real_queryid for l in queryids.collect()]\n", + "\n", + " def get_child(execid,node):\n", + " #wholestagetransformer not counted\n", + " if node['nodeName'] is not None and not node['nodeName'].startswith(\"WholeStageCodegenTransformer\"):\n", + " if node[\"nodeName\"] not in qps:\n", + " qps[node[\"nodeName\"]]={l:0 for l in list_queryid}\n", + " qps[node[\"nodeName\"]][execid]=qps[node[\"nodeName\"]][execid]+1\n", + " if node[\"children\"] is not None:\n", + " for c in node[\"children\"]:\n", + " get_child(execid,c)\n", + "\n", + " qps={}\n", + " for c in queryplans:\n", + " get_child(c['real_queryid'],c)\n", + "\n", + " return pandas.DataFrame(qps).T.sort_index(axis=0) \n", + " \n", + " def get_query_plan(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " queryid=kwargs.get(\"queryid\",None)\n", + " stageid=kwargs.get(\"stageid\",None)\n", + " \n", + " outputstage=kwargs.get(\"outputstage\",None)\n", + " \n", + " show_plan_only=kwargs.get(\"show_plan_only\",False)\n", + " show_simple_string=kwargs.get(\"show_simple_string\",False)\n", + "\n", + " plot=kwargs.get(\"plot\",True)\n", + " \n", + " colors=[\"#{:02x}{:02x}{:02x}\".format(int(l[0]*255),int(l[1]*255),int(l[2]*255)) for l in matplotlib.cm.get_cmap('tab20').colors]\n", + " \n", + " if queryid is not None:\n", + " if type(queryid)==int or type(queryid)==str:\n", + " queryid = [queryid,]\n", + " shown_stageid = [l[\"Stage ID\"] for l in self.df.where(F.col(\"real_queryid\").isin(queryid)).select(\"Stage ID\").distinct().collect()]\n", + " if stageid is not None:\n", + " if type(stageid)==int:\n", + " shown_stageid = [stageid,]\n", + " elif type(stageid)==list:\n", + " shown_stageid = stageid\n", + " queryid = [l[\"real_queryid\"] for l in self.df.where(F.col(\"`Stage ID`\").isin(shown_stageid)).select(\"real_queryid\").limit(1).collect()]\n", + "\n", + "\n", + " queryplans=[]\n", + " queryplans = self.queryplans.where(F.col(\"real_queryid\").isin(queryid)).orderBy(\"real_queryid\").collect() if queryid else self.queryplans.orderBy(\"real_queryid\").collect()\n", + " dfmetric=self.df.where(\"Event='SparkListenerTaskEnd'\").select(\"queryid\",\"real_queryid\",\"Stage ID\",\"Job ID\",F.explode(\"Accumulables\").alias(\"metric\")).select(\"*\",\"metric.*\").select(\"Stage ID\",\"ID\",\"Update\").groupBy(\"ID\",\"Stage ID\").agg(F.round(F.sum(\"Update\"),1).alias(\"value\"),F.round(F.stddev(\"Update\"),1).alias(\"stdev\")).collect()\n", + " accid2stageid={l.ID:(l[\"Stage ID\"],l[\"value\"],l[\"stdev\"]) for l in dfmetric}\n", + "\n", + " stagetime=self.df.where((F.col(\"real_queryid\").isin(queryid))).where(F.col(\"Event\")=='SparkListenerTaskEnd').groupBy(\"Stage ID\").agg(\n", + " F.round(F.sum(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000/self.executor_instances/self.executor_cores*self.taskcpus,1).alias(\"elapsed time\"),\n", + " F.round(F.stddev(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000,1).alias(\"time stdev\"),\n", + " F.count(F.col(\"Task ID\")).alias(\"partitions\")\n", + " ).orderBy(F.desc(\"elapsed time\")).collect()\n", + "\n", + " apptotaltime=reduce(lambda x,y: x+y['elapsed time'], stagetime,0)\n", + " if apptotaltime==0:\n", + " display(HTML(\"Error, totaltime is 0 \"))\n", + " apptotaltime=1\n", + " return \"\"\n", + "\n", + " stagemap={l[\"Stage ID\"]:l[\"elapsed time\"] for l in stagetime}\n", + " stage_time_stdev_map={l[\"Stage ID\"]:l[\"time stdev\"] for l in stagetime}\n", + " stagepartmap={l[\"Stage ID\"]:l[\"partitions\"] for l in stagetime}\n", + "\n", + " keystage=[]\n", + " keystagetime=[]\n", + " subtotal=0\n", + " for s in stagetime:\n", + " subtotal=subtotal+s['elapsed time']\n", + " keystage.append(s['Stage ID'])\n", + " keystagetime.append(s['elapsed time'])\n", + " if subtotal/apptotaltime>0.9:\n", + " break\n", + " keystagetime=[\"{:02x}{:02x}\".format(int(255*l/keystagetime[0]),255-int(255*l/keystagetime[0])) for l in keystagetime if keystagetime[0]>0]\n", + " keystagemap=dict(zip(keystage,keystagetime))\n", + " outstr=[]\n", + " def print_plan(real_queryid,level,node,parent_stageid):\n", + " stageid = accid2stageid[int(node[\"metrics\"][0][\"accumulatorId\"])][0] if node[\"metrics\"] is not None and len(node[\"metrics\"])>0 and node[\"metrics\"][0][\"accumulatorId\"] in accid2stageid else parent_stageid\n", + "\n", + " if stageid in shown_stageid:\n", + " fontcolor=f\"color:#{keystagemap[stageid]}00;font-weight:bold\" if stageid in keystagemap else \"color:#000000\"\n", + " stagetime=0 if stageid not in stagemap else stagemap[stageid]\n", + " stageParts=0 if stageid not in stagepartmap else stagepartmap[stageid]\n", + "\n", + " input_rowcntstr=\"\"\n", + " output_rowcntstr=\"\"\n", + " timename={}\n", + " input_columnarbatch=\"\"\n", + " output_columnarbatch=\"\"\n", + " output_row_batch=\"\"\n", + " other_metric_name={}\n", + "\n", + " outputrows=0\n", + " outputbatches=0\n", + " if node[\"metrics\"] is not None:\n", + " for m in node[\"metrics\"]:\n", + "\n", + " if m[\"accumulatorId\"] not in accid2stageid:\n", + " continue\n", + " \n", + " if m[\"name\"].endswith(\"block wall nanos\") or m['name'].endswith(\"cpu nanos\"):\n", + " continue\n", + " \n", + " \n", + " value=accid2stageid[m[\"accumulatorId\"]][1]\n", + " stdev_value=accid2stageid[m[\"accumulatorId\"]][2]\n", + " stdev_value=0 if stdev_value is None else stdev_value\n", + " if m[\"metricType\"] in ['nsTiming','timing']:\n", + " totaltime=value/1000 if m[\"metricType\"] == 'timing' else value/1000000000\n", + " stdev_value=stdev_value/1000 if m[\"metricType\"] == 'timing' else stdev_value/1000000000\n", + " \n", + " timeratio= 0 if stagetime==0 else totaltime/self.executor_instances/self.executor_cores*self.taskcpus/stagetime*100\n", + " timeratio_query = totaltime/self.executor_instances/self.executor_cores*self.taskcpus/apptotaltime*100\n", + " if timeratio > 10 or timeratio_query>10:\n", + " timename[m[\"name\"]]=\"{:.2f}s ({:.1f}%, {:.1f}%, {:.2f})\".format(totaltime,timeratio, totaltime/self.executor_instances/self.executor_cores*self.taskcpus/apptotaltime*100,stdev_value)\n", + " else:\n", + " timename[m[\"name\"]]=\"{:.2f}s ({:.1f}%, {:.1f}%, {:.2f})\".format(totaltime,timeratio, totaltime/self.executor_instances/self.executor_cores*self.taskcpus/apptotaltime*100,stdev_value)\n", + " elif m[\"name\"] in [\"number of output rows\",\"number of final output rows\"]:\n", + " output_rowcntstr=\"{:,.1f}\".format(value/1000/1000)+\" M\"\n", + " outputrows=value\n", + " elif m[\"name\"] in [\"number of output columnar batches\",\"number of output batches\",\"output_batches\", \"number of output vectors\",\"number of final output vectors\", \"records read\"]: \n", + " # records reads is the output of shuffle\n", + " output_columnarbatch=\"{:,d}\".format(int(value))\n", + " outputbatches=value\n", + " elif m[\"name\"]==\"number of input rows\":\n", + " input_rowcntstr=\"{:,.1f}\".format(value/1000/1000)+\" M\"\n", + " elif m[\"name\"] in [\"number of input batches\",\"input_batches\",\"number of input vectors\"]:\n", + " input_columnarbatch=\"{:,d}\".format(int(value))\n", + " else:\n", + " if value>1000000000:\n", + " other_metric_name[m[\"name\"]]=\"{:,.1f} G ({:,.1f})\".format(value/1000000000,stdev_value/1000000000)\n", + " elif value>1000000:\n", + " other_metric_name[m[\"name\"]]=\"{:,.1f} M ({:,.1f})\".format(value/1000000,stdev_value/1000000)\n", + " elif value>1000:\n", + " other_metric_name[m[\"name\"]]=\"{:,.1f} K ({:,.1f})\".format(value/1000,stdev_value/1000)\n", + " else:\n", + " other_metric_name[m[\"name\"]]=\"{:,d} ({:,.1f})\".format(int(value),stdev_value)\n", + "\n", + "\n", + " if outputrows>0 and outputbatches>0:\n", + " output_row_batch=\"{:,d}\".format(int(outputrows/outputbatches))\n", + "\n", + "\n", + " fontcolor=f\"color:#{keystagemap[stageid]}00;font-weight:bold\" if stageid in keystage else \"color:#000000\"\n", + " stagetime=0 if stageid not in stagemap else stagemap[stageid]\n", + " stage_time_stdev=0 if stageid not in stage_time_stdev_map else stage_time_stdev_map[stageid]\n", + " \n", + " nodenamestr=node[\"nodeName\"]\n", + " if nodenamestr is None:\n", + " nodenamestr=\"\"\n", + " if nodenamestr in ['ColumnarToRow','RowToArrowColumnar','ArrowColumnarToRow','ArrowRowToColumnarExec','GlutenColumnarToRowExec','GlutenRowToArrowColumnar']:\n", + " nodename=''+nodenamestr+''\n", + " else:\n", + " nodename=nodenamestr\n", + " if outputstage is not None:\n", + " outputstage.append({\"queryid\":real_queryid,\"stageid\":stageid,\"stagetime\":stagetime,\"stageParts\":stageParts,\"nodename\":nodenamestr,\"output_rowcnt\":outputrows,\"nodename_level\":\" \".join([\"|_\" for l in range(0,level)]) + \" \" + nodenamestr})\n", + " if not show_plan_only:\n", + " nodestr= \" \".join([\"|_\" for l in range(0,level)]) + \" \" + nodename\n", + " if show_simple_string :\n", + " simstr=node['simpleString']\n", + " nodestr = nodestr + \"
\\n\" + simstr \n", + " \n", + " timenametable='\\n'\n", + " \n", + " timenameSort=list(timename)\n", + " \n", + " for nameidx in sorted(timename):\n", + " timenametable+=f\"\"\n", + " timenametable+=\"
{nameidx}{timename[nameidx]}
\\n\"\n", + " \n", + " \n", + " othertable='\\n'\n", + " for nameidx in sorted(other_metric_name):\n", + " othertable+=f\"\"\n", + " othertable+=\"
{nameidx}{other_metric_name[nameidx]}
\\n\"\n", + " \n", + " outstr.append(f\"{stageid}\"+\n", + " f\" {stagetime}({stage_time_stdev}) \"+\n", + " f\" {stageParts} \"+\n", + " f\"\" + nodestr + f\"\"+\n", + " f\" {input_rowcntstr} \"+\n", + " f\" {input_columnarbatch} \"+\n", + " f\" {output_rowcntstr} \"+\n", + " f\" {output_columnarbatch} \"+\n", + " f\" {output_row_batch} \"+\n", + " f\" {timenametable} \"+\n", + " f\" {othertable} \"+\n", + " \"\")\n", + " else:\n", + " outstr.append(f\"{stageid}\"+\n", + " f\" {stagetime} \"+\n", + " f\" {stageParts} \"+\n", + " f\"\" + \" \".join([\"|_\" for l in range(0,level)]) + \" \" + nodename + f\"\"+\n", + " f\" {output_rowcntstr} \")\n", + " \n", + " if node[\"children\"] is not None:\n", + " for c in node[\"children\"]:\n", + " print_plan(real_queryid, level+1,c,stageid)\n", + "\n", + " for c in queryplans:\n", + " outstr.append(\"\"+str(c['real_queryid'])+\"\")\n", + " if not show_plan_only:\n", + " outstr.append('''\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " ''')\n", + " else:\n", + " outstr.append('''\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " ''')\n", + "\n", + " print_plan(c['real_queryid'],0,c,0)\n", + " outstr.append(\"
stage idstage timepartionsoperatorinput rowsinput batchesoutput rowsoutput batchesoutput rows/batchtime metric nametime(%stage,%total,stdev)other metric namevalue(stdev)
stage idstage timepartionsoperatoroutput rows
\")\n", + " if plot:\n", + " display(HTML(\" \".join(outstr)))\n", + " return \" \".join(outstr)\n", + " \n", + " def get_metric_output_rowcnt(self, **kwargs):\n", + " return self.get_metric_rowcnt(\"number of output rows\",**kwargs)\n", + " \n", + " def get_metric_input_rowcnt(self, **kwargs):\n", + " return self.get_metric_rowcnt(\"number of input rows\",**kwargs)\n", + " \n", + " def get_metric_rowcnt(self,rowname, **kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " queryid=kwargs.get(\"queryid\",None)\n", + " stageid=kwargs.get(\"stageid\",None)\n", + " show_task=kwargs.get(\"show_task\",False)\n", + " \n", + " if queryid and type(queryid)==int:\n", + " queryid = [queryid,]\n", + " \n", + " if stageid and type(stageid)==int:\n", + " stageid = [stageid,]\n", + " \n", + " queryplans = self.queryplans.where(F.col(\"real_queryid\").isin(queryid)).orderBy(\"real_queryid\").collect() if queryid else self.queryplans.orderBy(\"real_queryid\").collect()\n", + " qps=[]\n", + "\n", + " rownames=rowname if type(rowname)==list else [rowname,]\n", + " def get_child(execid,node):\n", + " if node['metrics'] is not None:\n", + " outputrows=[x for x in node[\"metrics\"] if \"name\" in x and x[\"name\"] in rownames]\n", + " if len(outputrows)>0:\n", + " qps.append([node[\"nodeName\"],execid,outputrows[0]['accumulatorId']])\n", + " if node[\"children\"] is not None:\n", + " for c in node[\"children\"]:\n", + " get_child(execid,c)\n", + " for c in queryplans:\n", + " get_child(c['real_queryid'],c)\n", + "\n", + " if len(qps)==0:\n", + " print(\"Metric \",rowname,\" is not found. \")\n", + " return None\n", + " stagetime=self.df.where(\"Event='SparkListenerTaskEnd'\").groupBy(\"Stage ID\").agg(F.round(F.sum(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000/self.executor_instances/self.executor_cores*self.taskcpus,2).alias(\"stage time\"))\n", + " dfmetric=self.df.where(\"Event='SparkListenerTaskEnd'\").select(\"queryid\",\"real_queryid\",\"Stage ID\",\"Job ID\",F.explode(\"Accumulables\").alias(\"metric\")).select(\"*\",\"metric.*\").drop(\"metric\")\n", + " numrowmetric=spark.createDataFrame(qps)\n", + " numrowmetric=numrowmetric.withColumnRenamed(\"_1\",\"metric\").withColumnRenamed(\"_2\",\"real_queryid\").withColumnRenamed(\"_3\",\"metricid\")\n", + " dfmetric_rowcnt=dfmetric.join(numrowmetric.drop(\"real_queryid\"),on=[F.col(\"metricid\")==F.col(\"ID\")],how=\"right\")\n", + " if show_task:\n", + " stagemetric=dfmetric_rowcnt.join(stagetime,\"Stage ID\")\n", + " else:\n", + " stagemetric=dfmetric_rowcnt.groupBy(\"queryid\",\"real_queryid\",\"Job ID\",\"Stage ID\",\"metricid\").agg(F.round(F.sum(\"Update\")/1000000,2).alias(\"total_row\"),F.max(\"metric\").alias(\"nodename\")).join(stagetime,\"Stage ID\")\n", + "\n", + " if queryid:\n", + " if stageid:\n", + " return stagemetric.where(F.col(\"real_queryid\").isin(queryid) & F.col(\"Stage ID\").isin(stageid)).orderBy(\"Stage ID\")\n", + " else:\n", + " return stagemetric.where(F.col(\"real_queryid\").isin(queryid)).orderBy(\"Stage ID\")\n", + " else:\n", + " noderow=stagemetric.groupBy(\"real_queryid\",\"nodename\").agg(F.round(F.sum(\"total_row\"),2).alias(\"total_row\")).orderBy(\"nodename\").collect()\n", + " out={}\n", + " qids=set([r.real_queryid for r in noderow])\n", + " for r in noderow:\n", + " if r.nodename not in out:\n", + " out[r.nodename]={c:0 for c in qids}\n", + " out[r.nodename][r.real_queryid]=r.total_row\n", + " return pandas.DataFrame(out).T.sort_index(axis=0)\n", + " \n", + " def get_query_info(self,queryid):\n", + " display(HTML(\" time stat info \",))\n", + " tmp=self.get_query_time(queryid=queryid)\n", + " display(HTML(\" stage stat info \",))\n", + " display(self.get_stage_stat(queryid=queryid))\n", + " display(HTML(\" query plan \",))\n", + " self.get_query_plan(queryid=queryid)\n", + " display(HTML(\" stage hist info \",))\n", + " self.show_Stages_hist(queryid=queryid)\n", + " display(HTML(\" time info \",))\n", + " display(self.show_time_metric(queryid=queryid))\n", + " display(HTML(\" operator and rowcount \",))\n", + " display(self.get_metric_input_rowcnt(queryid=queryid))\n", + " display(self.get_metric_output_rowcnt(queryid=queryid))\n", + " \n", + " def get_app_info(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " display(HTML(f\" {self.appid} \",))\n", + " display(HTML(f\"http://{localhost}:18080/history/{self.appid}\"))\n", + " display(HTML(\" query time \",))\n", + " tmp=self.get_query_time(**kwargs)\n", + " display(HTML(\" operator count \",))\n", + " pdf=self.getOperatorCount()\n", + " display(pdf.style.apply(background_gradient,\n", + " cmap='OrRd',\n", + " m=pdf.min().min(),\n", + " M=pdf.max().max(),\n", + " low=0,\n", + " high=1))\n", + " \n", + " display(HTML(\" operator input row count \",))\n", + " pdf=self.get_metric_input_rowcnt(**kwargs)\n", + " if pdf is not None:\n", + " display(pdf.style.apply(background_gradient,\n", + " cmap='OrRd',\n", + " m=pdf.min().min(),\n", + " M=pdf.max().max(),\n", + " low=0,\n", + " high=1))\n", + " display(HTML(\" operator output row count \",))\n", + " pdf=self.get_metric_output_rowcnt(**kwargs)\n", + " if pdf is not None:\n", + " display(pdf.style.apply(background_gradient,\n", + " cmap='OrRd',\n", + " m=pdf.min().min(),\n", + " M=pdf.max().max(),\n", + " low=0,\n", + " high=1))\n", + " self.show_time_metric(**kwargs)\n", + " \n", + " def get_stage_stat(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " queryid=kwargs.get(\"queryid\",None)\n", + "\n", + " if queryid and type(queryid)==int:\n", + " queryid = [queryid,]\n", + " \n", + " df=self.df.where(F.col(\"real_queryid\").isin(queryid)).where(F.col(\"Event\")=='SparkListenerTaskEnd')\n", + " \n", + " inputsize = df.select(\"real_queryid\",\"Stage ID\",\"Executor ID\", \"Task ID\", F.explode(\"Accumulables\")) \\\n", + " .select(\"real_queryid\",\"Stage ID\",\"Executor ID\", \"Task ID\",\"col.*\") \\\n", + " .where(\"Name='input size in bytes' or Name='size of files read'\") \\\n", + " .groupBy(\"Stage ID\") \\\n", + " .agg(F.round(F.sum(\"Update\")/1024/1024/1024,2).alias(\"input read\"))\n", + " \n", + " return df.groupBy(\"Job ID\",\"Stage ID\").agg(\n", + " F.round(F.sum(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000/self.executor_instances/self.executor_cores*self.taskcpus,1).alias(\"elapsed time\"),\n", + " F.round(F.sum(F.col(\"Disk Bytes Spilled\"))/1024/1024/1024,1).alias(\"disk spilled\"),\n", + " F.round(F.sum(F.col(\"Memory Bytes Spilled\"))/1024/1024/1024,1).alias(\"mem spilled\"),\n", + " F.round(F.sum(F.col(\"Local Bytes Read\"))/1024/1024/1024,1).alias(\"local read\"),\n", + " F.round(F.sum(F.col(\"Remote Bytes Read\"))/1024/1024/1024,1).alias(\"remote read\"),\n", + " F.round(F.sum(F.col(\"Shuffle Bytes Written\"))/1024/1024/1024,1).alias(\"shuffle write\"),\n", + " F.round(F.sum(F.col(\"Executor Deserialize Time\"))/1000,1).alias(\"deseri time\"),\n", + " F.round(F.sum(F.col(\"Fetch Wait Time\"))/1000,1).alias(\"fetch wait time\"),\n", + " F.round(F.sum(F.col(\"Shuffle Write Time\"))/1000000000,1).alias(\"shuffle write time\"),\n", + " F.round(F.sum(F.col(\"Result Serialization Time\"))/1000,1).alias(\"seri time\"),\n", + " F.round(F.sum(F.col(\"Getting Result Time\"))/1000,1).alias(\"get result time\"),\n", + " F.round(F.sum(F.col(\"JVM GC Time\"))/1000,1).alias(\"gc time\"),\n", + " F.round(F.sum(F.col(\"Executor CPU Time\"))/1000000000,1).alias(\"exe cpu time\") \n", + " ).join(inputsize,on=[\"Stage ID\"],how=\"left\").orderBy(\"Stage ID\").toPandas()\n", + " \n", + " def get_metrics_by_node(self,node_name):\n", + " if self.df is None:\n", + " self.load_data()\n", + " \n", + " if type(node_name)==str:\n", + " node_name=[node_name]\n", + " metrics=self.queryplans.collect()\n", + " coalesce=[]\n", + " metricsid=[0]\n", + " def get_metric(root):\n", + " if root['nodeName'] in node_name:\n", + " metricsid[0]=metricsid[0]+1\n", + " for l in root[\"metrics\"]:\n", + " coalesce.append([l['accumulatorId'],l[\"metricType\"],l['name'],root[\"nodeName\"],metricsid[0]])\n", + " if root[\"children\"] is not None:\n", + " for c in root[\"children\"]:\n", + " get_metric(c)\n", + " for c in metrics:\n", + " get_metric(c)\n", + "\n", + " df=self.df.select(\"queryid\",\"real_queryid\",'Stage ID','Task ID','Job ID',F.explode(\"Accumulables\"))\n", + " df=df.select(\"*\",\"col.*\")\n", + " metricdf=spark.createDataFrame(coalesce)\n", + " metricdf=metricdf.withColumnRenamed(\"_1\",\"ID\").withColumnRenamed(\"_2\",\"Unit\").withColumnRenamed(\"_3\",\"metricName\").withColumnRenamed(\"_4\",\"nodeName\").withColumnRenamed(\"_5\",\"nodeID\")\n", + " df=df.join(metricdf,on=[\"ID\"],how=\"right\")\n", + " shufflemetric=set(l[2] for l in coalesce)\n", + " metricdfs=[df.where(F.col(\"Name\")==l).groupBy(\"real_queryid\",\"nodeID\",\"Stage ID\").agg(F.stddev(\"Update\").alias(l+\"_stddev\"),F.mean(\"Update\").alias(l+\"_mean\"),F.mean(\"Update\").alias(l) if l.startswith(\"avg\") else F.sum(\"Update\").alias(l)) for l in shufflemetric]\n", + " \n", + " stagetimedf=self.df.where(\"Event='SparkListenerTaskEnd'\").groupBy(\"Stage ID\").agg(F.count(\"*\").alias(\"partnum\"),F.round(F.sum(F.col(\"Finish Time\")-F.col(\"Launch Time\"))/1000,2).alias(\"ElapsedTime\"))\n", + " \n", + " nodemetric=reduce(lambda x,y: x.join(y, on=['nodeID',\"Stage ID\",\"real_queryid\"],how=\"full\"),metricdfs)\n", + " return nodemetric.join(stagetimedf,on=\"Stage ID\")\n", + " \n", + " \n", + " def get_coalesce_batch_row_cnt(self,**kwargs):\n", + " stagesum=self.get_metrics_by_node(\"CoalesceBatches\")\n", + " \n", + " pandas.options.display.float_format = '{:,}'.format\n", + " \n", + " stagesum=stagesum.withColumnRenamed(\"number of output rows\",\"rows\")\n", + " \n", + " coalescedf = stagesum.orderBy(\"real_queryid\",'Stage ID').where(\"rows>4000\").toPandas()\n", + " \n", + " coalescedf[\"row/input_batch\"] = coalescedf[\"rows\"]/coalescedf[\"input_batches\"]\n", + " coalescedf[\"row/out_batch\"] = coalescedf[\"rows\"]/coalescedf[\"output_batches\"]\n", + " coalescedf['stage']=coalescedf[\"real_queryid\"].astype(str)+\"_\"+coalescedf['Stage ID'].astype(str)\n", + " \n", + " ax=coalescedf.plot(y=[\"row/input_batch\",\"row/out_batch\"],figsize=(30,8),style=\"-*\")\n", + " coalescedf.plot(ax=ax,y=['rows'],secondary_y=['rows'],style=\"k_\")\n", + " self.print_real_queryid(ax,coalescedf)\n", + " \n", + " return coalescedf\n", + " \n", + " def print_real_queryid(self,ax,dataset):\n", + " ax.axes.get_xaxis().set_ticks([])\n", + "\n", + " ymin, ymax = ax.get_ybound()\n", + "\n", + " real_queryid=list(dataset['real_queryid'])\n", + " s=real_queryid[0]\n", + " lastx=0\n", + " for idx,v in enumerate(real_queryid):\n", + " if v!=s:\n", + " xmin = xmax = idx-1+0.5\n", + " l = mlines.Line2D([xmin,xmax], [ymin,ymax],color=\"green\")\n", + " ax.add_line(l)\n", + " ax.text(lastx+(xmin-lastx)/2-0.25,ymin-(ymax-ymin)/20,f\"{s}\",size=20)\n", + " s=v\n", + " lastx=xmin\n", + "\n", + " def get_shuffle_stat(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + " \n", + " shufflesize=kwargs.get(\"shuffle_size\",1000000)\n", + " queryid=kwargs.get(\"queryid\",None)\n", + " if queryid is not None:\n", + " if type(queryid) is str or type(queryid) is int:\n", + " queryid=[queryid,]\n", + "\n", + " exchangedf=self.get_metrics_by_node([\"ColumnarExchange\",\"ColumnarExchangeAdaptor\"])\n", + " exchangedf.cache()\n", + " if exchangedf.count() == 0:\n", + " return (None, None)\n", + "\n", + " mapdf=exchangedf.where(\"`time to split` is not null\").select(\"nodeID\",F.col(\"Stage ID\").alias(\"map_stageid\"),\"real_queryid\",F.floor(F.col(\"time to split\")/F.col(\"time to split_mean\")).alias(\"map_partnum\"),\"time to compress\",\"time to split\",\"shuffle write time\",\"time to spill\",'shuffle records written','data size','shuffle bytes written','shuffle bytes written_mean','shuffle bytes written_stddev','shuffle bytes spilled','number of input rows','number of input batches')\n", + " reducerdf=exchangedf.where(\"`time to split` is null\").select(\"nodeID\",F.col(\"Stage ID\").alias(\"reducer_stageid\"),\"real_queryid\",'local blocks read','local bytes read',F.floor(F.col(\"records read\")/F.col(\"records read_mean\")).alias(\"reducer_partnum\"),(F.col('avg read batch num rows')/10).alias(\"avg read batch num rows\"),'remote bytes read','records read','remote blocks read',(F.col(\"number of output rows\")/F.col(\"records read\")).alias(\"avg rows per split recordbatch\"))\n", + " shuffledf=mapdf.join(reducerdf,on=[\"nodeID\",\"real_queryid\"],how=\"full\")\n", + " if queryid is not None:\n", + " shuffledf=shuffledf.where(F.col(\"real_queryid\").isin(queryid))\n", + " shuffle_pdf=shuffledf.where(\"`shuffle bytes written`>1000000\").orderBy(\"real_queryid\",\"map_stageid\",\"nodeID\").toPandas()\n", + " if shuffle_pdf.shape[0] == 0:\n", + " return (shuffledf, None)\n", + "\n", + " shuffle_pdf[\"shuffle bytes written\"]=shuffle_pdf[\"shuffle bytes written\"]/1000000000\n", + " shuffle_pdf[\"data size\"]=shuffle_pdf[\"data size\"]/1000000000\n", + " shuffle_pdf[\"shuffle bytes written_mean\"]=shuffle_pdf[\"shuffle bytes written_mean\"]/1000000\n", + " shuffle_pdf[\"shuffle bytes written_stddev\"]=shuffle_pdf[\"shuffle bytes written_stddev\"]/1000000\n", + " ax=shuffle_pdf.plot(y=[\"avg read batch num rows\",'avg rows per split recordbatch'],figsize=(30,8),style=\"-*\",title=\"average batch size after split\")\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " shuffle_pdf[\"split_ratio\"]=shuffle_pdf[\"records read\"]/shuffle_pdf['number of input batches']\n", + " ax=shuffle_pdf.plot(y=[\"split_ratio\",\"records read\"],secondary_y=[\"records read\"],figsize=(30,8),style=\"-*\",title=\"Split Ratio\")\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " shuffle_pdf[\"compress_ratio\"]=shuffle_pdf[\"data size\"]/shuffle_pdf['shuffle bytes written']\n", + " ax=shuffle_pdf.plot(y=[\"shuffle bytes written\",\"compress_ratio\"],secondary_y=[\"compress_ratio\"],figsize=(30,8),style=\"-*\",title=\"compress ratio\")\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " shufflewritepdf=shuffle_pdf\n", + " ax=shufflewritepdf.plot.bar(y=[\"shuffle write time\",\"time to spill\",\"time to compress\",\"time to split\"],stacked=True,figsize=(30,8),title=\"split time + shuffle write time vs. shuffle bytes written\")\n", + " ax=shufflewritepdf.plot(ax=ax,y=[\"shuffle bytes written\"],secondary_y=[\"shuffle bytes written\"],style=\"-*\")\n", + " self.print_real_queryid(ax,shufflewritepdf)\n", + " shuffle_pdf['avg input batch size']=shuffle_pdf[\"number of input rows\"]/shuffle_pdf[\"number of input batches\"]\n", + " ax=shuffle_pdf.plot(y=[\"avg input batch size\"],figsize=(30,8),style=\"b-*\",title=\"average input batch size\")\n", + " ax=shuffle_pdf.plot.bar(ax=ax,y=['number of input rows'],secondary_y=True)\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " \n", + " metrics=self.queryplans.collect()\n", + " coalesce=[]\n", + " metricsid=[0]\n", + " def get_metric(root):\n", + " if root['nodeName'] in [\"ColumnarExchange\",\"ColumnarExchangeAdaptor\"]:\n", + " metricsid[0]=metricsid[0]+1\n", + " for l in root[\"metrics\"]:\n", + " coalesce.append([l['accumulatorId'],l[\"metricType\"],l['name'],root[\"nodeName\"],metricsid[0],root[\"simpleString\"]])\n", + " if root[\"children\"] is not None:\n", + " for c in root[\"children\"]:\n", + " get_metric(c)\n", + " for c in metrics:\n", + " get_metric(c)\n", + "\n", + " tps={}\n", + " for r in coalesce:\n", + " rx=re.search(r\"\\[OUTPUT\\] List\\((.*)\\)\",r[5])\n", + " if rx:\n", + " if r[4] not in tps:\n", + " tps[r[4]]={}\n", + " fds=rx.group(1).split(\", \")\n", + " for f in fds:\n", + " if f.endswith(\"Type\"):\n", + " tp=re.search(r\":(.+Type)\",f).group(1)\n", + " if tp not in tps[r[4]]:\n", + " tps[r[4]][tp]=1\n", + " else:\n", + " tps[r[4]][tp]+=1\n", + " if len(tps)>0:\n", + " typedf=pandas.DataFrame(tps).T.reset_index()\n", + " typedf=typedf.fillna(0)\n", + " shuffle_pdf=pandas.merge(shuffle_pdf,typedf,left_on=\"nodeID\",right_on=\"index\")\n", + " shufflewritepdf=shuffle_pdf\n", + " ax=shufflewritepdf.plot.bar(y=[\"number of input rows\"],stacked=True,figsize=(30,8),title=\"rows vs. shuffle data type\")\n", + " ax=shufflewritepdf.plot(ax=ax,y=list(typedf.columns[1:]),secondary_y=list(typedf.columns[1:]),style=\"-o\")\n", + " self.print_real_queryid(ax,shufflewritepdf)\n", + " ax=shufflewritepdf.plot.bar(y=[\"time to split\"],stacked=True,figsize=(30,8),title=\"split time vs. shuffle data type\")\n", + " ax=shufflewritepdf.plot(ax=ax,y=list(typedf.columns[1:]),secondary_y=list(typedf.columns[1:]),style=\"-o\")\n", + " self.print_real_queryid(ax,shufflewritepdf)\n", + "\n", + " \n", + " \n", + " shufflewritepdf.plot(x=\"shuffle bytes written\",y=[\"shuffle write time\",\"time to split\"],figsize=(30,8),style=\"*\")\n", + " shufflewritepdf[\"avg shuffle batch size after split\"]=shufflewritepdf[\"shuffle bytes written\"]*1000000/shufflewritepdf['records read']\n", + " shufflewritepdf[\"avg raw batch size after split\"]=shufflewritepdf[\"data size\"]*1000000/shufflewritepdf['records read']\n", + " ax=shufflewritepdf.plot(y=[\"avg shuffle batch size after split\",\"avg raw batch size after split\",\"shuffle bytes written\"],secondary_y=[\"shuffle bytes written\"],figsize=(30,8),style=\"-*\",title=\"avg batch KB after split\")\n", + " self.print_real_queryid(ax,shufflewritepdf)\n", + " shufflewritepdf[\"avg batch# per splitted partition\"]=shufflewritepdf['records read']/(shufflewritepdf['local blocks read']+shufflewritepdf['remote blocks read'])\n", + " ax=shufflewritepdf.plot(y=[\"avg batch# per splitted partition\",'records read'],secondary_y=['records read'],figsize=(30,8),style=\"-*\",title=\"avg batch# per splitted partition\")\n", + " self.print_real_queryid(ax,shufflewritepdf)\n", + " fig, ax = plt.subplots(figsize=(30,8))\n", + " ax.set_title('shuffle wite bytes with stddev')\n", + " ax.errorbar(x=shuffle_pdf.index,y=shuffle_pdf['shuffle bytes written_mean'], yerr=shuffle_pdf['shuffle bytes written_stddev'], linestyle='None', marker='o')\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " shuffle_pdf['record batch per mapper per reducer']=shuffle_pdf['records read']/(shuffle_pdf[\"map_partnum\"]*shuffle_pdf['reducer_partnum'])\n", + " ax=shuffle_pdf.plot(y=[\"record batch per mapper per reducer\"],figsize=(30,8),style=\"b-*\",title=\"record batch per mapper per reducer\")\n", + " self.print_real_queryid(ax,shuffle_pdf)\n", + " \n", + " inputsize = self.df.select(\"Stage ID\",\"Executor ID\", \"Task ID\", F.explode(\"Accumulables\")) \\\n", + " .select(\"Stage ID\",\"Executor ID\", \"Task ID\",\"col.*\") \\\n", + " .where(\"Name='input size in bytes' or Name='size of files read'\") \\\n", + " .groupBy(\"Task ID\") \\\n", + " .agg((F.sum(\"Update\")).alias(\"input read\"))\n", + " stageinput=self.df.where(\"event='SparkListenerTaskEnd'\" )\\\n", + " .join(inputsize,on=[\"Task ID\"],how=\"left\")\\\n", + " .fillna(0) \\\n", + " .select(F.col('Host'), F.col(\"real_queryid\"),F.col('Stage ID'),F.col('Task ID'),\n", + " F.round((F.col('Finish Time')/1000-F.col('Launch Time')/1000),2).alias('elapsedtime'),\n", + " F.round((F.col('`input read`')+F.col('`Bytes Read`')+F.col('`Local Bytes Read`')+F.col('`Remote Bytes Read`'))/1024/1024,2).alias('input'))\n", + " baisstage=stageinput.groupBy(\"real_queryid\",\"Stage ID\").agg(F.mean(\"elapsedtime\").alias(\"elapsed\"),F.mean(\"input\").alias(\"input\"),\n", + " (F.stddev(\"elapsedtime\")).alias(\"elapsedtime_err\"),\n", + " (F.stddev(\"input\")).alias(\"input_err\"),\n", + " (F.max(\"elapsedtime\")-F.mean(\"elapsedtime\")).alias(\"elapsed_max\"),\n", + " (F.mean(\"elapsedtime\")-F.min(\"elapsedtime\")).alias(\"elapsed_min\"),\n", + " (F.max(\"input\")-F.mean(\"input\")).alias(\"input_max\"),\n", + " (F.mean(\"input\")-F.min(\"input\")).alias(\"input_min\")).orderBy(\"real_queryid\",\"Stage ID\")\n", + " dfx=baisstage.toPandas()\n", + " fig, ax = plt.subplots(figsize=(30,8))\n", + " ax.set_title('input size')\n", + " ax.errorbar(x=dfx.index,y=dfx['input'], yerr=dfx['input_err'], fmt='ok', ecolor='red', lw=3)\n", + " ax.errorbar(x=dfx.index,y=dfx['input'],yerr=[dfx['input_min'],dfx['input_max']],\n", + " fmt='.k', ecolor='gray', lw=1)\n", + " self.print_real_queryid(ax,dfx)\n", + " \n", + " fig, ax = plt.subplots(figsize=(30,8))\n", + " ax.set_title('stage time')\n", + "\n", + " ax.errorbar(x=dfx.index,y=dfx['elapsed'], yerr=dfx['elapsedtime_err'], fmt='ok', ecolor='red', lw=5)\n", + " ax.errorbar(x=dfx.index,y=dfx['elapsed'],yerr=[dfx['elapsed_min'],dfx['elapsed_max']],\n", + " fmt='.k', ecolor='gray', lw=1)\n", + "\n", + " self.print_real_queryid(ax,dfx)\n", + " return (shuffle_pdf,dfx)\n", + " \n", + " def get_stages_w_odd_partitions(appals,**kwargs):\n", + " if appals.df is None:\n", + " appals.load_data()\n", + " return appals.df.where(\"Event='SparkListenerTaskEnd'\")\\\n", + " .groupBy(\"Stage ID\",\"real_queryid\")\\\n", + " .agg((F.sum(F.col('Finish Time')-F.col('Launch Time'))/1000).alias(\"elapsed time\"),\n", + " F.count('*').alias('partitions'))\\\n", + " .where(F.col(\"partitions\")%(appals.executor_cores*appals.executor_instances/appals.taskcpus)!=0)\\\n", + " .orderBy(F.desc(\"elapsed time\")).toPandas()\n", + " \n", + " def get_scaned_column_v1(appals):\n", + " def get_scans(node):\n", + " if node['nodeName'].startswith(\"Scan arrow\"):\n", + " scans.append(node)\n", + " for c in node['children']:\n", + " get_scans(c)\n", + "\n", + " alltable=[]\n", + " for qid in range(1,23):\n", + " scans=[]\n", + " plans=appals.queryplans.where(\"real_queryid=\"+str(qid)).collect()\n", + " get_scans(plans[0])\n", + " for s in scans:\n", + " alltable.append([qid,\",\".join([l.split(\":\")[0] for l in re.split(r'[<>]',s['metadata']['ReadSchema'])[1].split(\",\")])])\n", + " return alltable\n", + " \n", + " def get_scaned_column_v2(appals):\n", + " def get_scans(node):\n", + " if node['nodeName'].startswith(\"ColumnarBatchScan\"):\n", + " scans.append(node)\n", + " for c in node['children']:\n", + " get_scans(c)\n", + "\n", + " alltable=[]\n", + " for qid in range(1,23):\n", + " scans=[]\n", + " plans=appals.queryplans.where(\"real_queryid=\"+str(qid)).collect()\n", + " get_scans(plans[0])\n", + " for s in scans:\n", + " alltable.append([qid,\",\".join([l.split(\"#\")[0] for l in re.split(r\"[\\[\\]]\",s['simpleString'])[1].split(\",\")])])\n", + " return alltable\n", + " \n", + " def compare_query(appals,queryid,appbaseals):\n", + " print(f\"~~~~~~~~~~~~~~~~~~~~~~~~~~~~Query{queryid}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\")\n", + " appals.show_critical_path_time_breakdown(queryid=22)\n", + " s1=appals.get_stage_stat(queryid=queryid)\n", + " s2=appbaseals.get_stage_stat(queryid=queryid)\n", + " ls=s1[['Stage ID','elapsed time']]\n", + " ls.columns=['l sid','l time']\n", + " rs=s2[['Stage ID','elapsed time']]\n", + " rs.columns=['r sid','r time']\n", + " js=ls.join(rs)\n", + " js['gap']=js['r time'] - js['l time']\n", + " js['gap']=js['gap'].round(2)\n", + " display(js)\n", + " display(s1)\n", + " display(s2)\n", + " stagesmap={}\n", + " for x in range(0,min(len(s1),len(s2))):\n", + " stagesmap[s1['Stage ID'][x]]=s2['Stage ID'][x]\n", + " totaltime=sum(s1['elapsed time'])\n", + " acctime=0\n", + " s1time=s1.sort_values(\"elapsed time\",ascending=False,ignore_index=True)\n", + " ldfx=appals.get_metric_output_rowcnt(queryid=queryid)\n", + " rdfx=appbaseals.get_metric_output_rowcnt(queryid=queryid)\n", + "\n", + " for x in range(0,len(s1time)):\n", + " sid1=int(s1time['Stage ID'][x])\n", + " sid2=int(stagesmap[sid1])\n", + " print(f\"============================================================\")\n", + " display(ldfx[ldfx['Stage ID']==sid1])\n", + " display(rdfx[ldfx['Stage ID']==sid2])\n", + " print(f\" Gazelle Query {queryid} Stage {sid1}\")\n", + " xf=appals.get_query_plan(stageid=sid1,show_simple_string=True)\n", + " print(f\" Photon Query {queryid} Stage {sid2}\")\n", + " xf=appbaseals.get_query_plan(stageid=sid2,show_simple_string=True)\n", + " acctime+=s1time['elapsed time'][x]\n", + " if acctime/totaltime>=0.9:\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "notlist=['resource.executor.cores',\n", + " 'spark.app.id',\n", + " 'spark.app.initial.file.urls',\n", + " 'spark.app.name',\n", + " 'spark.app.startTime',\n", + " 'spark.driver.port',\n", + " 'spark.job.description',\n", + " 'spark.jobGroup.id',\n", + " 'spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS',\n", + " 'spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES',\n", + " 'spark.rdd.scope',\n", + " 'spark.sql.execution.id',\n", + " '__fetch_continuous_blocks_in_batch_enabled',\n", + " 'spark.driver.appUIAddress'\n", + " 'spark.driver.appUIAddress',\n", + " 'spark.driver.host',\n", + " 'spark.driver.appUIAddress',\n", + " 'spark.driver.extraClassPath',\n", + " 'spark.eventLog.dir',\n", + " 'spark.executorEnv.CC',\n", + " 'spark.executorEnv.LD_LIBRARY_PATH',\n", + " 'spark.executorEnv.LD_PRELOAD',\n", + " 'spark.executorEnv.LIBARROW_DIR',\n", + " 'spark.files',\n", + " 'spark.history.fs.logDirectory',\n", + " 'spark.sql.warehouse.dir',\n", + " 'spark.yarn.appMasterEnv.LD_PRELOAD',\n", + " 'spark.yarn.dist.files'\n", + "]\n", + "def comp_spark_conf(app0,app1): \n", + " pdf_sparkconf_0=app0.get_spark_config()\n", + " pdf_sparkconf_1=app1.get_spark_config()\n", + " pdfc=pdf_sparkconf_0.join(pdf_sparkconf_1,lsuffix=app0.appid[-8:],rsuffix=app1.appid[-8:])\n", + " pdfc[\"0\"+app0.appid[-8:]]=pdfc[\"0\"+app0.appid[-8:]].str.lower()\n", + " pdfc[\"0\"+app1.appid[-8:]]=pdfc[\"0\"+app1.appid[-8:]].str.lower()\n", + " \n", + " pdfc['comp']=(pdfc[\"0\"+app0.appid[-8:]]==pdfc[\"0\"+app1.appid[-8:]])\n", + " return pdfc.loc[(pdfc['comp']==False) & (~pdfc.index.isin(notlist))]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Node log analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "@pandas_udf(\"host string, id string,taskid int, time double\", PandasUDFType.GROUPED_MAP)\n", + "def collect_udf_time(pdf):\n", + " proxy_handler = request.ProxyHandler({})\n", + " opener = request.build_opener(proxy_handler)\n", + "\n", + " rst=[]\n", + " for idx,l in pdf.iterrows():\n", + " ip=\"10.1.2.19\"+l['Host'][-1:]\n", + " execid=\"{:06d}\".format(int(l['Executor ID'])+1)\n", + " appid=l['appid']\n", + " url = f'http://{ip}:8042/node/containerlogs/container_{appid}_01_{execid}/sparkuser/stderr/?start=0'\n", + " # open the website with the opener\n", + " req = opener.open(url)\n", + " data = req.read().decode('utf8')\n", + " cnt=data.split(\"\\n\")\n", + " cnt_udf=[l.split(\" \") for l in cnt if l.startswith('start UDF') or l.startswith('stop UDF')]\n", + " unf_pdf=pandas.DataFrame(cnt_udf)\n", + " srst=unf_pdf.loc[:,[0,4,6]]\n", + " srst.columns=['id','taskid','time']\n", + " srst['host']=l['Host']\n", + " srst['taskid']=srst['taskid'].astype(int)\n", + " srst['time']=srst['time'].apply(lambda f: float(re.search('\\d+\\.\\d+',f).group(0)))\n", + " rst.append(srst)\n", + " return pandas.concat(rst)\n", + "\n", + "\n", + "class App_Log_Analysis_Node_log(App_Log_Analysis):\n", + " def __init__(self, appid,jobids):\n", + " App_Log_Analysis.__init__(self, appid,jobids)\n", + " \n", + " def generate_trace_view_list(self,id=0, **kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " showcpu=kwargs['showcpu'] if 'showcpu' in kwargs else False\n", + " \n", + " appid=self.appid\n", + " events=self.df.toPandas()\n", + " coretrack={}\n", + " trace_events=[]\n", + " starttime=0\n", + " taskend=[]\n", + " trace={\"traceEvents\":[]}\n", + " exec_hosts={}\n", + " hostsdf=self.df.select(\"Host\").distinct().orderBy(\"Host\")\n", + " hostid=100000\n", + " ended_event=[]\n", + "\n", + " for i,l in hostsdf.toPandas().iterrows():\n", + " exec_hosts[l['Host']]=hostid\n", + " hostid=hostid+100000\n", + "\n", + " tskmap={}\n", + " for idx,l in events.iterrows():\n", + " if l['Event']=='SparkListenerTaskStart':\n", + " hostid=exec_hosts[l['Host']]\n", + "\n", + " tsk=l['Task ID']\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " stime=l['Launch Time']\n", + " #the task's starttime and finishtime is the same, ignore it.\n", + " if tsk in ended_event:\n", + " continue\n", + " if not pid in coretrack:\n", + " tids={}\n", + " trace_events.append({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"{:s}.{:s}\".format(l['Host'],l['Executor ID'])}\n", + " })\n", + "\n", + " else:\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==-1:\n", + " tids[t]=[tsk,stime]\n", + " break\n", + " else:\n", + " t=len(tids)\n", + " tids[t]=[tsk,stime]\n", + " #print(\"task {:d} tid is {:s}.{:d}\".format(tsk,pid,t))\n", + " coretrack[pid]=tids\n", + "\n", + " if l['Event']=='SparkListenerTaskEnd':\n", + " sevt={}\n", + " eevt={}\n", + " hostid=exec_hosts[l['Host']]\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " tsk=l['Task ID']\n", + " fintime=l['Finish Time']\n", + "\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==tsk:\n", + " tids[t]=[-1,-1]\n", + " break\n", + " else:\n", + " ended_event.append(tsk)\n", + " continue\n", + " for ps in reversed([key for key in tids.keys()]) :\n", + " if tids[ps][1]-fintime<0 and tids[ps][1]-fintime>=-2:\n", + " fintime=tids[ps][1]\n", + " tids[t]=tids[ps]\n", + " tids[ps]=[-1,-1]\n", + " break\n", + " if starttime==0:\n", + " starttime=l['Launch Time']\n", + "\n", + " sstime=l['Launch Time']-starttime\n", + "\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':sstime,\n", + " 'dur':fintime-l['Launch Time'],\n", + " 'pid':pid,\n", + " \"ph\":'X',\n", + " 'name':\"stg{:d}\".format(l['Stage ID']),\n", + " 'args':{\"job id\": l['job id'],\n", + " \"stage id\": l['Stage ID'],\n", + " \"tskid\":tsk,\n", + " \"input\":builtins.round(l[\"Bytes Read\"]/1024/1024,2),\n", + " \"spill\":builtins.round(l[\"Memory Bytes Spilled\"]/1024/1024,2),\n", + " \"Shuffle Read Metrics\": \"\",\n", + " \"|---Local Read\": builtins.round(l[\"Local Bytes Read\"]/1024/1024,2),\n", + " \"|---Remote Read\":builtins.round(l[\"Remote Bytes Read\"]/1024/1024,2),\n", + " \"Shuffle Write Metrics\": \"\",\n", + " \"|---Write\":builtins.round(l['Shuffle Bytes Written']/1024/1024,2)\n", + " }\n", + " })\n", + " tskmap[tsk]={'pid':pid,'tid':pid+int(t)}\n", + "\n", + " self.starttime=starttime\n", + " self.tskmap=tskmap\n", + "\n", + " hostdf=self.df.select('Host','Executor ID',F.lit(appid[len('application_'):]).alias('appid')).distinct().orderBy('Host')\n", + " rst=hostdf.groupBy('Host').apply(collect_udf_time)\n", + " rst.cache()\n", + " start_df=rst.where(\"id='start'\").select(F.col('taskid').alias('start_taskid'),F.col('time').alias(\"starttime\"))\n", + " stop_df=rst.where(\"id='stop'\").select('taskid',F.col('time').alias(\"stop_time\"))\n", + " df=start_df.join(stop_df, on=[start_df.start_taskid==stop_df.taskid,stop_df['stop_time']>=start_df['starttime']],how='left').groupBy('taskid','starttime').agg(F.min('stop_time').alias('stop_time'))\n", + " pdf=df.toPandas() \n", + " for idx,l in pdf.iterrows():\n", + " trace_events.append({\n", + " 'tid':self.tskmap[l['taskid']]['tid'],\n", + " 'ts':l['starttime']*1000-self.starttime,\n", + " 'dur':(l['stop_time']-l['starttime'])*1000, \n", + " 'pid':self.tskmap[l['taskid']]['pid'],\n", + " 'ph':'X',\n", + " 'name':'udf'})\n", + " \n", + " return [json.dumps(l) for l in trace_events]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class App_Log_Analysis_Node_log(App_Log_Analysis):\n", + " def __init__(self, appid,jobids):\n", + " App_Log_Analysis.__init__(self, appid,jobids)\n", + " \n", + " def generate_trace_view_list(self,id=0, **kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " showcpu=kwargs['showcpu'] if 'showcpu' in kwargs else False\n", + " \n", + " appid=self.appid\n", + " events=self.df.toPandas()\n", + " coretrack={}\n", + " trace_events=[]\n", + " starttime=0\n", + " taskend=[]\n", + " trace={\"traceEvents\":[]}\n", + " exec_hosts={}\n", + " hostsdf=self.df.select(\"Host\").distinct().orderBy(\"Host\")\n", + " hostid=100000\n", + " ended_event=[]\n", + "\n", + " for i,l in hostsdf.toPandas().iterrows():\n", + " exec_hosts[l['Host']]=hostid\n", + " hostid=hostid+100000\n", + "\n", + " tskmap={}\n", + " for idx,l in events.iterrows():\n", + " if l['Event']=='SparkListenerTaskStart':\n", + " hostid=exec_hosts[l['Host']]\n", + "\n", + " tsk=l['Task ID']\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " stime=l['Launch Time']\n", + " #the task's starttime and finishtime is the same, ignore it.\n", + " if tsk in ended_event:\n", + " continue\n", + " if not pid in coretrack:\n", + " tids={}\n", + " trace_events.append({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"{:s}.{:s}\".format(l['Host'],l['Executor ID'])}\n", + " })\n", + "\n", + " else:\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==-1:\n", + " tids[t]=[tsk,stime]\n", + " break\n", + " else:\n", + " t=len(tids)\n", + " tids[t]=[tsk,stime]\n", + " #print(\"task {:d} tid is {:s}.{:d}\".format(tsk,pid,t))\n", + " coretrack[pid]=tids\n", + "\n", + " if l['Event']=='SparkListenerTaskEnd':\n", + " sevt={}\n", + " eevt={}\n", + " hostid=exec_hosts[l['Host']]\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " tsk=l['Task ID']\n", + " fintime=l['Finish Time']\n", + "\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==tsk:\n", + " tids[t]=[-1,-1]\n", + " break\n", + " else:\n", + " ended_event.append(tsk)\n", + " continue\n", + " for ps in reversed([key for key in tids.keys()]) :\n", + " if tids[ps][1]-fintime<0 and tids[ps][1]-fintime>=-2:\n", + " fintime=tids[ps][1]\n", + " tids[t]=tids[ps]\n", + " tids[ps]=[-1,-1]\n", + " break\n", + " if starttime==0:\n", + " starttime=l['Launch Time']\n", + "\n", + " sstime=l['Launch Time']-starttime\n", + "\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':sstime,\n", + " 'dur':fintime-l['Launch Time'],\n", + " 'pid':pid,\n", + " \"ph\":'X',\n", + " 'name':\"stg{:d}\".format(l['Stage ID']),\n", + " 'args':{\"job id\": l['job id'],\n", + " \"stage id\": l['Stage ID'],\n", + " \"tskid\":tsk,\n", + " \"input\":builtins.round(l[\"Bytes Read\"]/1024/1024,2),\n", + " \"spill\":builtins.round(l[\"Memory Bytes Spilled\"]/1024/1024,2),\n", + " \"Shuffle Read Metrics\": \"\",\n", + " \"|---Local Read\": builtins.round(l[\"Local Bytes Read\"]/1024/1024,2),\n", + " \"|---Remote Read\":builtins.round(l[\"Remote Bytes Read\"]/1024/1024,2),\n", + " \"Shuffle Write Metrics\": \"\",\n", + " \"|---Write\":builtins.round(l['Shuffle Bytes Written']/1024/1024,2)\n", + " }\n", + " })\n", + " tskmap[tsk]={'pid':pid,'tid':pid+int(t)}\n", + "\n", + " self.starttime=starttime\n", + " self.tskmap=tskmap\n", + "\n", + " hostdf=self.df.select('Host','Executor ID',F.lit(appid[len('application_'):]).alias('appid')).distinct().orderBy('Host')\n", + " rst=hostdf.groupBy('Host').apply(collect_udf_time)\n", + " rst.cache()\n", + " start_df=rst.where(\"id='start'\").select(F.col('taskid').alias('start_taskid'),F.col('time').alias(\"starttime\"))\n", + " stop_df=rst.where(\"id='stop'\").select('taskid',F.col('time').alias(\"stop_time\"))\n", + " df=start_df.join(stop_df, on=[start_df.start_taskid==stop_df.taskid,stop_df['stop_time']>=start_df['starttime']],how='left').groupBy('taskid','starttime').agg(F.min('stop_time').alias('stop_time'))\n", + " pdf=df.toPandas() \n", + " for idx,l in pdf.iterrows():\n", + " trace_events.append({\n", + " 'tid':self.tskmap[l['taskid']]['tid'],\n", + " 'ts':l['starttime']*1000-self.starttime,\n", + " 'dur':(l['stop_time']-l['starttime'])*1000, \n", + " 'pid':self.tskmap[l['taskid']]['pid'],\n", + " 'ph':'X',\n", + " 'name':'udf'})\n", + " \n", + " return [json.dumps(l) for l in trace_events]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class App_Log_Analysis_Node_Log_Uni(App_Log_Analysis):\n", + " def __init__(self, file,jobids):\n", + " App_Log_Analysis.__init__(self, file,jobids)\n", + " \n", + " def generate_trace_view_list(self,id=0, **kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " showcpu=False\n", + " \n", + " shownodes=kwargs.get(\"shownodes\",None)\n", + "\n", + " showdf=self.df #self.df.where(F.col(\"Host\").isin(shownodes)) if shownodes else self.df\n", + "\n", + " events=showdf.drop(\"Accumulables\",\"Stage IDs\").orderBy(\"Launch Time\",\"Finish Time\").toPandas()\n", + " coretrack={}\n", + " trace_events=[]\n", + " starttime=0\n", + " taskend=[]\n", + " trace={\"traceEvents\":[]}\n", + " exec_hosts={}\n", + " hostsdf=showdf.select(\"Host\").distinct().orderBy(\"Host\")\n", + " hostid=100000\n", + " ended_event=[]\n", + "\n", + " applog=os.path.splitext(self.file)[0]+\".stdout\"\n", + " logdfs=[]\n", + " if fs.exists(applog):\n", + " logdata=sc.textFile(os.path.splitext(self.file)[0]+\".stdout\",84)\n", + " logdf=logdata.mapPartitions(splits).toDF()\n", + " logdfs.append(logdf)\n", + "\n", + " p=os.path.split(self.file)\n", + " for c in shownodes:\n", + " f=p[0]+\"/\"+c+\"/xgbtck.txt\"\n", + " if fs.exists(f):\n", + " logdata=sc.textFile(f,84)\n", + " logdf=logdata.mapPartitions(splits).toDF()\n", + " logdfs.append(logdf)\n", + " logdf=reduce(lambda l,r: l.concat(r),logdfs)\n", + " logdf=logdf.cache()\n", + " logdf.count()\n", + "\n", + " firstrow=logdf.limit(1).collect()\n", + "\n", + " for c in logdf.columns:\n", + " if firstrow[0][c]!=\"xgbtck\":\n", + " logdf=logdf.drop(c)\n", + " else:\n", + " break\n", + "\n", + " usefulc=[\"xgbtck\",\"event\",\"ts\",\"elapsed\",\"threadid\",\"taskid\"]\n", + " for i in range(0,len(usefulc)):\n", + " logdf=logdf.withColumnRenamed(logdf.columns[i],usefulc[i])\n", + "\n", + " logdf=logdf.where(F.col(\"event\").isin(['load_library','data_load','data_convert']))\n", + " \n", + " task_thread=logdf.where(\"event='data_convert'\").select(F.col(\"taskid\").astype(IntegerType()),F.col(\"threadid\").astype(IntegerType())).distinct().toPandas().set_index('taskid').to_dict('index')\n", + " #task_thread={}\n", + "\n", + " for i,l in hostsdf.toPandas().iterrows():\n", + " exec_hosts[l['Host']]=hostid\n", + " hostid=hostid+100000\n", + "\n", + " tskmap={}\n", + " for idx,l in events.iterrows():\n", + " if l['Event']=='SparkListenerTaskStart':\n", + " hostid=exec_hosts[l['Host']]\n", + "\n", + " tsk=l['Task ID']\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " stime=l['Launch Time']\n", + " #the task's starttime and finishtime is the same, ignore it.\n", + " if tsk in ended_event:\n", + " continue\n", + " if not pid in coretrack:\n", + " tids={}\n", + " trace_events.append({\n", + " \"name\": \"process_name\",\n", + " \"ph\": \"M\",\n", + " \"pid\":pid,\n", + " \"tid\":0,\n", + " \"args\":{\"name\":\"{:s}.{:s}\".format(l['Host'],l['Executor ID'])}\n", + " })\n", + "\n", + " else:\n", + " tids=coretrack[pid]\n", + "\n", + " tidarr=[tsk,stime]\n", + "\n", + " for t in tids.keys():\n", + " if tids[t][0]==-1:\n", + " tids[t]=tidarr\n", + " break\n", + " else:\n", + " t=len(tids)\n", + " tids[t]=tidarr\n", + " #print(\"task {:d} tid is {:s}.{:d}\".format(tsk,pid,t))\n", + " coretrack[pid]=tids\n", + "\n", + " if l['Event']=='SparkListenerTaskEnd':\n", + " sevt={}\n", + " eevt={}\n", + " hostid=exec_hosts[l['Host']]\n", + " pid=int(l['Executor ID'])*100+hostid\n", + " tsk=l['Task ID']\n", + " fintime=l['Finish Time']\n", + "\n", + " tids=coretrack[pid]\n", + " for t in tids.keys():\n", + " if tids[t][0]==tsk:\n", + " tids[t]=[-1,-1]\n", + " break\n", + " else:\n", + " ended_event.append(tsk)\n", + " continue\n", + " for ps in reversed([key for key in tids.keys()]):\n", + " if (tids[ps][1]-fintime<0 and tids[ps][1]-fintime>=-2) or \\\n", + " (tsk in task_thread and tids[ps][0] in task_thread and task_thread[tsk][\"threadid\"]==task_thread[tids[ps][0]][\"threadid\"]):\n", + " fintime=tids[ps][1]\n", + " tids[t]=tids[ps]\n", + " tids[ps]=[-1,-1]\n", + " break\n", + " if starttime==0:\n", + " starttime=l['Launch Time']\n", + "\n", + " sstime=l['Launch Time']-starttime\n", + "\n", + " trace_events.append({\n", + " 'tid':pid+int(t),\n", + " 'ts':sstime,\n", + " 'dur':fintime-l['Launch Time'],\n", + " 'pid':pid,\n", + " \"ph\":'X',\n", + " 'name':\"stg{:d}\".format(l['Stage ID']),\n", + " 'args':{\"job id\": l['Job ID'],\n", + " \"stage id\": l['Stage ID'],\n", + " \"tskid\":tsk,\n", + " \"input\":builtins.round(l[\"Bytes Read\"]/1024/1024,2),\n", + " \"spill\":builtins.round(l[\"Memory Bytes Spilled\"]/1024/1024,2),\n", + " \"Shuffle Read Metrics\": \"\",\n", + " \"|---Local Read\": builtins.round(l[\"Local Bytes Read\"]/1024/1024,2),\n", + " \"|---Remote Read\":builtins.round(l[\"Remote Bytes Read\"]/1024/1024,2),\n", + " \"Shuffle Write Metrics\": \"\",\n", + " \"|---Write\":builtins.round(l['Shuffle Bytes Written']/1024/1024,2)\n", + " }\n", + " })\n", + " tskmap[tsk]={'pid':pid,'tid':pid+int(t)}\n", + "\n", + " self.starttime=starttime\n", + " self.tskmap=tskmap\n", + "\n", + " tskmapdf = spark.createDataFrame(pandas.DataFrame(self.tskmap).T.reset_index())\n", + " logdf=logdf.withColumn(\"ts\",F.col(\"ts\").astype(LongType()))\n", + " logdf=logdf.withColumn(\"taskid\",F.col(\"taskid\").astype(LongType()))\n", + " logdf=logdf.withColumnRenamed(\"event\",'type')\n", + " mgd=logdf.join(tskmapdf,on=(F.col('taskid')==F.col(\"index\")),how=\"right\")\n", + " rstdf=mgd.select(F.col('tid').alias(\"tid\"),\n", + " (F.round(F.col('ts')-F.lit(self.starttime),3)).alias(\"ts\"),\n", + " F.round(F.col(\"elapsed\"),3).alias(\"dur\"),\n", + " F.lit(F.col('pid')).alias(\"pid\"),\n", + " F.lit(\"X\").alias(\"ph\"),\n", + " F.col(\"type\").alias(\"name\")\n", + " ).where(F.col(\"ts\").isNotNull()).orderBy('ts')\n", + "\n", + " # logdf=logdf.withColumn(\"type\",F.substring_index(\"event\",\"_\",1))\n", + " # window= Window.partitionBy(logdf['taskid']).orderBy(\"type\",\"ts\")\n", + " # logdfx=logdf.select(\"taskid\",\"event\",\"type\",\"ts\",F.lag('ts',1).over(window).alias(\"last\"),F.lag('rownum',1).over(window).alias(\"rownum\")).orderBy(\"taskid\",\"ts\").where(\"event like '%end'\")\n", + "\n", + "\n", + " output=[json.dumps(l) for l in trace_events]\n", + " output.extend(rstdf.toJSON().collect())\n", + "\n", + " return output" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# perf trace analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "def split_trace(x):\n", + " fi=[]\n", + " for l in x:\n", + " rst1=re.search(r\"^(\\d+\\.\\d+).*sched:(sched_switch):.+:(\\d+) \\[\\d+\\] (\\S+) ==> .+:(\\d+) \"\"\",l)\n", + " rst2=re.search(r\"(\\d+\\.\\d+) \\( +(\\d+\\.\\d+) +ms\\):[^/]+/(\\d+) (recvfrom|sendto)\\(fd: \\d+<\\S+:\\[\\d+\\]>, \\S+: 0x[a-f0-9]+, \\S+: (\\d+)\",l)\n", + " rst3=re.search(r\"(\\d+\\.\\d+) \\( +\\): [^/]+/(\\d+) (recvfrom|sendto)\\(fd: \\d+<\\S+:\\[\\d+\\]>, \\S+: 0x[a-f0-9]+, \\S+: (\\d+)\",l)\n", + " rst4=re.search(r\"(\\d+\\.\\d+) \\( *(\\d+\\.\\d+) ms\\): [^/]+/(\\d+) ... \\[continued\\]: (sendto|recvfrom|poll)\",l)\n", + " rst5=re.search(r\"(\\d+\\.\\d+) \\( +(\\d+\\.\\d+) +ms\\): [^/]+/(\\d+) (poll)\",l)\n", + " rst6=re.search(r\"(\\d+\\.\\d+) \\( +\\): [^/]+/(\\d+) (poll)\",l)\n", + "\n", + " rstx=re.search(r\"(\\d+\\.\\d+)*sched:(sched_switch):.*prev_pid=(\\d+).*prev_state=(\\S+) ==> .*next_pid=(\\d+)\"\"\",l)\n", + " if not rst1:\n", + " rst1=rstx\n", + " \n", + " if rst1:\n", + " fi.append((rst1.group(1),rst1.group(2),rst1.group(3),rst1.group(4),rst1.group(5))) #time, switch, src, status, dst\n", + " elif rst2:\n", + " fi.append((rst2.group(1),rst2.group(4),rst2.group(3),rst2.group(2),rst2.group(5))) #time, sed/rcv, pid, ms, size \n", + " elif rst3:\n", + " fi.append((rst3.group(1),rst3.group(3),rst3.group(2),0, rst3.group(4))) #time, sed/rcv, pid, 0, size\n", + " elif rst4:\n", + " fi.append((rst4.group(1),rst4.group(4),rst4.group(3),rst4.group(2), 0)) #time, sed/rcv, pid, ms, 0\n", + " elif rst5:\n", + " fi.append((rst5.group(1),rst5.group(4),rst5.group(3),rst5.group(2), 0)) #time, sed/rcv, pid, ms, 0\n", + " elif rst6:\n", + " fi.append((rst6.group(1),rst6.group(3),rst6.group(2),0, 0)) #time, sed/rcv, pid, ms0, 0\n", + " elif not re.match(r\"^ +?\",l):\n", + " fi.append((0,l,'','',''))\n", + " return iter(fi)\n", + " \n", + "\n", + "\n", + "class Perf_trace_analysis(Analysis):\n", + " def __init__(self,sar_file):\n", + " Analysis.__init__(self,sar_file)\n", + " self.starttime=None\n", + " \n", + " def load_data(self):\n", + " sardata=sc.textFile(self.file)\n", + " sardf=sardata.mapPartitions(split_trace).toDF()\n", + " display(sardf.where(\"_1=0\").limit(5).collect())\n", + " sardf=sardf.withColumn(\"_1\",F.col(\"_1\").astype(DoubleType()))\n", + " sardf=sardf.where(\"_1>0\")\n", + " starttime=sardf.agg(F.min(\"_1\")).collect()[0][0]\n", + " if self.starttime is None:\n", + " self.starttime=(float(starttime))\n", + " else:\n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/uptime.txt\"):\n", + " with fs.open(paths[0]+\"/uptime.txt\") as f:\n", + " strf=f.read().decode('ascii')\n", + " print(\"input starttime:\",self.starttime,\"uptime:\",float(strf)*1000,\"record starttime:\",starttime)\n", + " self.starttime=self.starttime-float(strf)*1000\n", + " else:\n", + " print(\"uptime.txt isn't found, wrong\")\n", + " return\n", + " \n", + " self.df=sardf\n", + " return sardf\n", + "\n", + " def generate_sched_view_list(self,id=0,**kwargs):\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " starttime=starttime+kwargs.get(\"sched_time_offset\",0)\n", + " print(\"offset time\",starttime)\n", + " \n", + " swdf=sardf.where(\"_2='sched_switch'\")\n", + " \n", + " cputhreshold=kwargs.get(\"cpu_threshold\",0.1)\n", + " sched_cnt = kwargs.get(\"sched_cnt\",10)\n", + " \n", + " pidstat_tids=kwargs.get(\"pidstat_tids\",None)\n", + " pidstat_tids_txt=kwargs.get(\"pidstat_tids_txt\",\"sched_threads.txt\")\n", + " \n", + " if pidstat_tids:\n", + " if type(pidstat_tids) is list:\n", + " tids=pidstat_tids\n", + " else:\n", + " tids=[re.split(r'\\s+',t) for t in pidstat_tids.split(\"\\n\")]\n", + " tids=[t[3] for t in tids if len(t)>4]\n", + " else:\n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/\"+pidstat_tids_txt):\n", + " with fs.open(paths[0]+\"/\"+pidstat_tids_txt) as f:\n", + " tids=[l.strip() for l in f.read().decode('ascii').split(\"\\n\") if len(l)>0] \n", + " else:\n", + " print(\"Wrong, no pidstat_tids args and no sched_threads.txt file\")\n", + " return []\n", + " tidcnt=swdf.where(F.col(\"_5\").isin(tids)).groupBy(\"_5\").count()\n", + " tidm10=tidcnt.where(\"count>{:d}\".format(sched_cnt)).select(\"_5\").collect()\n", + " rtids=[t[0] for t in tidm10]\n", + " rtiddf=swdf.where(F.col(\"_5\").isin(rtids) | F.col(\"_3\").isin(rtids))\n", + " rtiddf=rtiddf.withColumn(\"_1\",F.col(\"_1\").astype(DoubleType())-starttime)\n", + " rtiddf=rtiddf.withColumn(\"_3\",F.col(\"_3\").astype(IntegerType()))\n", + " rtiddf=rtiddf.withColumn(\"_5\",F.col(\"_5\").astype(IntegerType()))\n", + " rtiddf=rtiddf.withColumn(\"_1\",F.round(F.col(\"_1\"),3))\n", + " rtidcol=rtiddf.collect()\n", + " tidmap={}\n", + " tidtotal={}\n", + " for t in rtids:\n", + " tidmap[int(t)]=0\n", + " tidtotal[int(t)]=0\n", + " trace_events=[]\n", + " mintime=rtidcol[0][\"_1\"]\n", + " maxtime=0\n", + " for r in rtidcol:\n", + " if r[\"_3\"] in tidtotal:\n", + " tidtotal[r[\"_3\"]]=tidtotal[r[\"_3\"]]+r[\"_1\"]-tidmap[r[\"_3\"]]\n", + " tidmap[r[\"_3\"]]=r[\"_1\"]\n", + " maxtime=r[\"_1\"]\n", + " if r[\"_5\"] in tidmap:\n", + " tidmap[r[\"_5\"]]=r[\"_1\"]\n", + " for r in rtidcol:\n", + " if r[\"_3\"] in tidmap and tidtotal[r[\"_3\"]]/(maxtime-mintime)>cputhreshold:\n", + " trace_events.append({\n", + " 'tid':r[\"_3\"],\n", + " 'ts':tidmap[r[\"_3\"]],\n", + " 'pid':id,\n", + " 'ph':'X',\n", + " 'dur':round(r[\"_1\"]-tidmap[r[\"_3\"]],3),\n", + " 'name':r[\"_4\"]\n", + " })\n", + "\n", + " tidmap[r[\"_3\"]]=r[\"_1\"]\n", + " if r[\"_5\"] in tidmap:\n", + " tidmap[r[\"_5\"]]=r[\"_1\"]\n", + " return [json.dumps(l) for l in trace_events]\n", + "\n", + " def generate_nic_view_list(self,id=0,**kwargs):\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " starttime=starttime+kwargs.get(\"sched_time_offset\",0)\n", + " print(\"offset time\",starttime)\n", + " \n", + " nicdf=sardf.where(\"_2<>'sched_switch'\")\n", + " cntdf=nicdf.where(\"_2='continued'\")\n", + " cntdf=cntdf.select(\"_1\",\"_3\",\"_4\").withColumnRenamed(\"_4\",\"cnt_4\")\n", + " nicdf=nicdf.join(cntdf,on=[\"_1\",\"_3\"],how=\"leftouter\")\n", + " nicdf=nicdf.where(\"_2<>'continued'\")\n", + " nicdf=nicdf.select(F.col(\"_1\"),F.col(\"_2\"),F.col(\"_3\"),F.when(F.col(\"cnt_4\").isNull(), F.col(\"_4\")).otherwise(F.col(\"cnt_4\")).alias(\"_4\"),F.col(\"_5\"))\n", + " nicdf=nicdf.withColumn(\"_1\",F.col(\"_1\").astype(DoubleType())-starttime)\n", + " nicdf=nicdf.withColumn(\"_3\",F.col(\"_3\").astype(IntegerType()))\n", + " nicdf=nicdf.withColumn(\"_5\",F.col(\"_5\").astype(IntegerType()))\n", + " nicdf=nicdf.withColumn(\"_1\",F.col(\"_1\").astype(IntegerType()))\n", + " nicdf=nicdf.withColumn(\"_4\",F.col(\"_4\").astype(DoubleType()))\n", + " nicdf=nicdf.withColumn(\"_4\",F.col(\"_4\").astype(LongType()))\n", + " return nicdf.select(\n", + " F.col(\"_3\").alias('tid'),\n", + " (F.col(\"_1\")).alias('ts'),\n", + " F.lit(0).alias('pid'),\n", + " F.lit('X').alias('ph'),\n", + " F.col(\"_4\").alias('dur'),\n", + " F.col(\"_2\").alias('name'),\n", + " F.struct(\n", + " F.col(\"_5\").alias(\"size\")\n", + " ).alias('args')\n", + " ).toJSON().collect()\n", + " \n", + " def generate_trace_view_list(self,id=0,**kwargs):\n", + " trace_events=Analysis.generate_trace_view_list(self,id,**kwargs)\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " \n", + " events=self.generate_sched_view_list(id,**kwargs)\n", + " events.extend(self.generate_nic_view_list(id,**kwargs))\n", + " events.extend(trace_events)\n", + " \n", + "# events.extend(nicdf.where(\"_5>1000 and _2='sendto'\").select(\n", + "# F.lit(0).alias('tid'),\n", + "# F.col(\"_1\").alias('ts'),\n", + "# F.lit(0).alias('pid'),\n", + "# F.lit('i').alias('ph'),\n", + "# F.col(\"_2\").alias('name'),\n", + "# F.lit(\"g\").alias(\"s\")\n", + "# ).toJSON().collect())\n", + "\n", + "\n", + " return events\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Sar analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "def splits(x):\n", + " fi=[]\n", + " for l in x:\n", + " li=re.split(r'\\s+',l)\n", + " for j in range(len(li),118):\n", + " li.append('')\n", + " fi.append(li)\n", + " return iter(fi)\n", + "\n", + "class Sar_analysis(Analysis):\n", + " def __init__(self,sar_file):\n", + " Analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " sardata=sc.textFile(self.file)\n", + " sardf=sardata.mapPartitions(splits).toDF()\n", + " sardf=sardf.where(\"_1<>'Average:'\")\n", + " \n", + " colstart=1;\n", + " ampm=sardf.where(\"_2='AM' or _2='PM'\").count()\n", + " if ampm==0:\n", + " for i in range(len(sardf.columns),1,-1):\n", + " sardf=sardf.withColumnRenamed(f'_{i}',f'_{i+1}')\n", + " self.timeformat='yyyy-MM-dd HH:mm:ss '\n", + " sardf=sardf.withColumn('_2',F.lit(''))\n", + " #print('no PM/AM')\n", + " colstart=1\n", + " else:\n", + " self.timeformat='yyyy-MM-dd hh:mm:ss a'\n", + " colstart=2\n", + " #print('with PM/AM')\n", + " \n", + " f=fs.open(self.file)\n", + " t=f.readline()\n", + " t=f.readline()\n", + " while len(t)==1:\n", + " t=f.readline()\n", + " cols=t.decode('ascii')\n", + " li=re.split(r'\\s+',cols)\n", + " ci=3;\n", + " for c in li[colstart:]:\n", + " sardf=sardf.withColumnRenamed(f\"_{ci}\",c)\n", + " ci=ci+1\n", + " \n", + " sardf=sardf.where(F.col(li[-2])!=li[-2]).where(F.col(\"_1\")!=F.lit(\"Linux\")) \n", + " \n", + " sardf.cache()\n", + " self.df=sardf\n", + " \n", + " self.sarversion=\"\"\n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/sarv.txt\"):\n", + " with fs.open(paths[0]+\"/sarv.txt\") as f:\n", + " allcnt = f.read().decode('ascii')\n", + " #print(allcnt)\n", + " self.sarversion=allcnt.split(\"\\n\")[0].split(\" \")[2]\n", + " \n", + " return sardf\n", + "\n", + " def col_df(self,cond,colname,args,slaver_id=0, thread_id=0):\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " cpudf=sardf.where(cond)\n", + " #cpudf.select(F.date_format(F.from_unixtime(F.lit(starttime/1000)), 'yyyy-MM-dd HH:mm:ss').alias('starttime'),'_1').show(1)\n", + "\n", + " cpudf=cpudf.withColumn('time',F.unix_timestamp(F.concat_ws(' ',F.date_format(F.from_unixtime(F.lit(starttime/1000)), 'yyyy-MM-dd'),F.col('_1'),F.col('_2')),self.timeformat))\n", + "\n", + " cols=cpudf.columns\n", + " \n", + " cpudf=cpudf.groupBy('time').agg(\n", + " F.sum(F.when(F.col(cols[1]).rlike('^\\d+(\\.\\d+)*$'),F.col(cols[1]).astype(FloatType())).otherwise(0)).alias(cols[1]),\n", + " F.sum(F.when(F.col(cols[2]).rlike('^\\d+(\\.\\d+)*$'),F.col(cols[2]).astype(FloatType())).otherwise(0)).alias(cols[2]),\n", + " *[F.sum(F.col(c)).alias(c) for c in cols[3:] if not c.startswith(\"_\") and c!=\"\" and c!=\"time\"]\n", + " )\n", + " \n", + " traces=cpudf.orderBy(F.col(\"time\")).select(\n", + " F.lit(thread_id).alias('tid'),\n", + " (F.expr(\"time*1000\")-F.lit(self.starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(slaver_id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.lit(colname).alias('name'),\n", + " args(cpudf).alias('args')\n", + " ).toJSON().collect()\n", + " return traces\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Analysis.generate_trace_view_list(self,id, **kwargs)\n", + " return trace_events\n", + "\n", + " def get_stat(self,**kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + " \n", + "class Sar_cpu_analysis(Sar_analysis):\n", + " def __init__(self,sar_file):\n", + " Sar_analysis.__init__(self,sar_file)\n", + " \n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Sar_analysis.generate_trace_view_list(self,id, **kwargs)\n", + " \n", + " self.df=self.df.withColumn(\"%iowait\",F.when(F.col(\"%iowait\")>100,F.lit(100)).otherwise(F.col(\"%iowait\")))\n", + " \n", + " trace_events.extend(self.col_df(\"CPU='all'\", \"all cpu%\", lambda l: F.struct(\n", + " F.floor(F.col('%user').astype(FloatType())).alias('user'),\n", + " F.floor(F.col('%system').astype(FloatType())).alias('system'),\n", + " F.floor(F.col('%iowait').astype(FloatType())).alias('iowait')\n", + " ), id, 0))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":0,\"args\":{\"sort_index \":0}}))\n", + " \n", + " return trace_events \n", + " def get_stat(sar_cpu,**kwargs):\n", + " Sar_analysis.get_stat(sar_cpu)\n", + " \n", + " cpuutil=sar_cpu.df.where(\"CPU='all'\").groupBy(\"_1\").agg(*[F.mean(F.col(l).astype(FloatType())).alias(l) for l in [\"%user\",\"%system\",\"%iowait\"]]).orderBy(\"_1\")\n", + " cnt=cpuutil.count()\n", + " user_morethan_90=cpuutil.where(\"`%user`>0.9\").count()\n", + " kernel_morethan_10=cpuutil.where(\"`%system`>0.1\").count()\n", + " iowait_morethan_10=cpuutil.where(\"`%iowait`>0.1\").count()\n", + " out=[['%user>90%',user_morethan_90/cnt],['%kernel>10%',kernel_morethan_10/cnt],[\"%iowait>10%\",iowait_morethan_10/cnt]]\n", + " avgutil=cpuutil.agg(*[F.mean(l).alias(l) for l in [\"%user\",\"%system\",\"%iowait\"]]).collect()\n", + " out.extend([[\"avg \" + l,avgutil[0][l]] for l in [\"%user\",\"%system\",\"%iowait\"]])\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " pdout.columns=[sar_cpu.file.split(\"/\")[-2]]\n", + " return pdout\n", + " \n", + "class Sar_mem_analysis(Sar_analysis):\n", + " def __init__(self,sar_file):\n", + " Sar_analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " Sar_analysis.load_data(self)\n", + " sarv=[int(l) for l in self.sarversion.split(\".\")]\n", + " if sarv[0]>=12 and sarv[1]>=2:\n", + " self.df=self.df.withColumn(\"kbrealused\",F.col(\"kbmemused\"))\n", + " else:\n", + " # sar 10.1.5, sar 11.6.1\n", + " self.df=self.df.withColumn(\"kbrealused\",F.col(\"kbmemused\")-F.col(\"kbcached\")-F.col(\"kbbuffers\"))\n", + " \n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Sar_analysis.generate_trace_view_list(self,id, **kwargs)\n", + " \n", + " \n", + " trace_events.extend(self.col_df(F.col('kbmemfree').rlike('^\\d+$'),\"mem % \", lambda l: F.struct(F.floor(l['kbcached']*l['%memused']/l['kbmemused']).alias('cached'), # kbcached / (kbmemfree+kbmemused)\n", + " F.floor(l['kbbuffers']*l['%memused']/l['kbmemused']).alias('buffered'),# kbbuffers / (kbmemfree+kbmemused)\n", + " F.floor(l['kbrealused']*l['%memused']/l['kbmemused']).alias('used')), # (%memused- kbcached-kbbuffers )/ (kbmemfree+kbmemused)\n", + " id,1))\n", + " #trace_events.extend(self.col_df(self.df._3.rlike('^\\d+$'),\"mem cmt % \", lambda l: F.struct(F.floor(l._8*F.lit(100)/(l._3+l._4)).alias('commit/phy'),\n", + " # F.floor(l._10-l._8*F.lit(100)/(l._3+l._4)).alias('commit/all')), id))\n", + " trace_events.extend(self.col_df(F.col('kbmemfree').rlike('^\\d+$'),\"pagecache % \", lambda l: F.struct(F.floor((l['kbcached']-l['kbdirty'])*l['%memused']/l['kbmemused']).alias('clean'), \n", + " F.floor(l['kbdirty']*l['%memused']/l['kbmemused']).alias('dirty')),\n", + " id,2))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":1,\"args\":{\"sort_index \":1}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":2,\"args\":{\"sort_index \":2}}))\n", + " return trace_events \n", + " def get_stat(sar_mem,**kwargs):\n", + " Sar_analysis.get_stat(sar_mem)\n", + " \n", + " memutil=sar_mem.df.where(F.col('kbmemfree').rlike('^\\d+$')).select(F.floor(F.col('kbcached').astype(FloatType())*F.lit(100)*F.col('%memused')/F.col('kbmemused')).alias('cached'), \n", + " F.floor(F.col('kbbuffers').astype(FloatType())*F.lit(100)*F.col('%memused')/F.col('kbmemused')).alias('buffered'),\n", + " F.floor(F.col('kbrealused').astype(FloatType())*F.lit(100)*F.col('%memused')/F.col('kbmemused')).alias('used'),\n", + " F.floor(F.col('kbdirty').astype(FloatType())*F.lit(100)*F.col('%memused')/F.col('kbmemused')).alias('dirty'))\n", + " memsum=memutil.summary().toPandas()\n", + " memsum=memsum.set_index(\"summary\")\n", + " out=[\n", + " [[l + ' mean',float(memsum[l][\"mean\"])],\n", + " [l + ' 75%',float(memsum[l][\"75%\"])],\n", + " [l + ' max',float(memsum[l][\"max\"])]] for l in [\"cached\",\"used\",\"dirty\"]]\n", + " out=[*out[0],*out[1]]\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " pdout.columns=[sar_mem.file.split(\"/\")[-2]]\n", + " return pdout\n", + " \n", + "class Sar_PageCache_analysis(Sar_analysis):\n", + " def __init__(self,sar_file):\n", + " Sar_analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " Sar_analysis.load_data(self)\n", + " \n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Sar_analysis.generate_trace_view_list(self,id, **kwargs)\n", + " \n", + " \n", + " trace_events.extend(self.col_df(F.col('pgpgin/s').rlike('^\\d'),\"page inout\", lambda l: F.struct(\n", + " F.floor(l['pgpgin/s']/1024).alias('in'),\n", + " F.floor(l['pgpgout/s']/1024).alias('out')),\n", + " id,11))\n", + " trace_events.extend(self.col_df(F.col('pgpgin/s').rlike('^\\d'),\"faults\", lambda l: F.struct(F.floor((l['majflt/s'])).alias('major'), \n", + " F.floor(l['fault/s']-l['majflt/s']).alias('minor')),\n", + " id,12))\n", + " trace_events.extend(self.col_df(F.col('pgpgin/s').rlike('^\\d'),\"page free\", lambda l: F.struct(F.floor((l['pgfree/s']*4/1024)).alias('free')),\n", + " id,13))\n", + " trace_events.extend(self.col_df(F.col('pgpgin/s').rlike('^\\d'),\"scan\", lambda l: F.struct(F.floor((l['pgscank/s'])*4/1024).alias('kernel'), \n", + " F.floor(l['pgscand/s']*4/1024).alias('app')),\n", + " id,14))\n", + " trace_events.extend(self.col_df(F.col('pgpgin/s').rlike('^\\d'),\"vmeff\", lambda l: F.struct(F.floor((l['%vmeff'])).alias('steal')),\n", + " id,15))\n", + " \n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":11,\"args\":{\"sort_index \":11}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":12,\"args\":{\"sort_index \":12}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":13,\"args\":{\"sort_index \":13}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":14,\"args\":{\"sort_index \":14}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":15,\"args\":{\"sort_index \":15}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":16,\"args\":{\"sort_index \":16}}))\n", + " return trace_events \n", + " def get_stat(sar_mem,**kwargs):\n", + " Sar_analysis.get_stat(sar_mem)\n", + " \n", + " memutil=sar_mem.df.where(F.col('pgpgin/s').rlike('^\\d')).select(F.floor(F.col('pgpgin/s').astype(FloatType())/1024).alias('pgin'), \n", + " F.floor(F.col('pgpgout/s').astype(FloatType())/1024).alias('pgout'),\n", + " F.floor(F.col('fault/s').astype(FloatType())-F.col('majflt/s').astype(FloatType())).alias('fault')\n", + " )\n", + " memsum=memutil.summary().toPandas()\n", + " memsum=memsum.set_index(\"summary\")\n", + " out=[\n", + " [[l + ' mean',float(memsum[l][\"mean\"])],\n", + " [l + ' 75%',float(memsum[l][\"75%\"])],\n", + " [l + ' max',float(memsum[l][\"max\"])]] for l in [\"pgin\",\"pgout\",\"fault\"]]\n", + " out=[*out[0],*out[1],*out[2]]\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " pdout.columns=[sar_mem.file.split(\"/\")[-2]]\n", + " return pdout\n", + " \n", + " \n", + "class Sar_disk_analysis(Sar_analysis):\n", + " def __init__(self,sar_file):\n", + " Sar_analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " Sar_analysis.load_data(self)\n", + " \n", + " self.df=self.df.withColumn(\"%util\",F.col(\"%util\").astype(IntegerType()))\n", + " used_disk=self.df.groupBy(\"DEV\").agg(F.max(F.col(\"%util\")).alias(\"max_util\"),F.mean(\"%util\").alias(\"avg_util\")).where(F.col(\"max_util\")>10).collect()\n", + " self.df=self.df.where(F.col(\"DEV\").isin([l['DEV'] for l in used_disk]))\n", + " #print(\"used disks with its max util% and avg util% are: \")\n", + " #display([(l['DEV'],l[\"max_util\"],l[\"avg_util\"]) for l in used_disk])\n", + " \n", + " if \"rd_sec/s\" in self.df.columns:\n", + " self.df=self.df.withColumn(\"rkB/s\",F.expr('cast(`rd_sec/s` as float)*512/1024'))\n", + " if \"wr_sec/s\" in self.df.columns:\n", + " self.df=self.df.withColumn(\"wkB/s\",F.expr('cast(`wr_sec/s` as float)*512/1024'))\n", + " \n", + " if \"areq-sz\" in self.df.columns:\n", + " self.df=self.df.withColumnRenamed(\"areq-sz\",\"avgrq-sz\")\n", + " if \"aqu-sz\" in self.df.columns:\n", + " self.df=self.df.withColumnRenamed(\"aqu-sz\",\"avgqu-sz\")\n", + " \n", + " if \"rkB/s\" in self.df.columns:\n", + " self.df=self.df.withColumn(\"rkB/s\",F.expr('cast(`rkB/s` as float)/1024'))\n", + " if \"wkB/s\" in self.df.columns:\n", + " self.df=self.df.withColumn(\"wkB/s\",F.expr('cast(`wkB/s` as float)/1024'))\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Sar_analysis.generate_trace_view_list(self,id, **kwargs)\n", + "\n", + " disk_prefix=kwargs.get('disk_prefix',\"\")\n", + " \n", + " if type(disk_prefix)==str:\n", + " diskfilter = \"DEV like '\"+disk_prefix+\"%'\"\n", + " elif type(disk_prefix)==list:\n", + " diskfilter = \"DEV in (\"+\",\".join(disk_prefix)+\")\"\n", + " else:\n", + " diskfilter = \"DEV like '%'\"\n", + "\n", + " print(diskfilter)\n", + " devcnt=self.df.where(diskfilter).select(\"DEV\").distinct().count()\n", + " \n", + " trace_events.extend(self.col_df(diskfilter, \"disk b/w\", lambda l: F.struct(\n", + " F.floor(F.col(\"rKB/s\")).alias('read'),\n", + " F.floor(F.col(\"wKB/s\")).alias('write')),id, 3))\n", + " trace_events.extend(self.col_df(diskfilter, \"disk%\", lambda l: F.struct(\n", + " (F.col(\"%util\")/F.lit(devcnt)).alias('%util')),id, 4))\n", + " trace_events.extend(self.col_df(diskfilter, \"req size\", lambda l: F.struct(\n", + " (F.col(\"avgrq-sz\")/F.lit(devcnt)).alias('avgrq-sz')),id, 5))\n", + " trace_events.extend(self.col_df(diskfilter, \"queue size\", lambda l: F.struct(\n", + " (F.col(\"avgqu-sz\")/F.lit(512*devcnt/1024)).alias('avgqu-sz')),id, 6))\n", + " trace_events.extend(self.col_df(diskfilter, \"await\", lambda l: F.struct(\n", + " (F.col(\"await\")/F.lit(devcnt)).alias('await')),id,7))\n", + " \n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":3,\"args\":{\"sort_index \":3}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":4,\"args\":{\"sort_index \":4}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":5,\"args\":{\"sort_index \":5}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":6,\"args\":{\"sort_index \":6}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":7,\"args\":{\"sort_index \":7}}))\n", + " return trace_events \n", + "\n", + " def get_stat(sar_disk,**kwargs):\n", + " Sar_analysis.get_stat(sar_disk)\n", + " disk_prefix=kwargs.get('disk_prefix',\"\")\n", + " \n", + " if type(disk_prefix)==str:\n", + " diskfilter = \"DEV like '\"+disk_prefix+\"%'\"\n", + " elif type(disk_prefix)==list:\n", + " diskfilter = \"DEV in (\"+\",\".join(disk_prefix)+\")\"\n", + " else:\n", + " diskfilter = \"DEV like '%'\"\n", + "\n", + " diskutil=sar_disk.df.where(diskfilter).groupBy(\"_1\").agg(F.mean(F.col(\"%util\").astype(FloatType())).alias(\"%util\")).orderBy(\"_1\")\n", + " totalcnt=diskutil.count()\n", + " time_morethan_90=diskutil.where(F.col(\"%util\")>90).count()/totalcnt\n", + " avgutil=diskutil.agg(F.mean(\"%util\")).collect()\n", + " out=[[\"avg disk util\",avgutil[0][\"avg(%util)\"]],\n", + " [\"time more than 90%\", time_morethan_90]]\n", + " diskbw=sar_disk.df.where(diskfilter).groupBy(\"_1\").agg(F.sum(F.col(\"rKB/s\")).alias(\"rd_bw\"),F.sum(F.col(\"wKB/s\")).alias(\"wr_bw\"))\n", + " bw=diskbw.agg(F.sum(\"rd_bw\").alias(\"total read\"),F.sum(\"wr_bw\").alias(\"total write\"),F.mean(\"rd_bw\").alias(\"read bw\"),F.mean(\"wr_bw\").alias(\"write bw\"),F.max(\"rd_bw\").alias(\"max read\"),F.max(\"wr_bw\").alias(\"max write\")).collect()\n", + " maxread=bw[0][\"max read\"]\n", + " maxwrite=bw[0][\"max write\"]\n", + " rdstat, wrstat = diskbw.stat.approxQuantile(['rd_bw','wr_bw'],[0.75,0.95,0.99],0.0)\n", + " time_rd_morethan_95 = diskbw.where(F.col(\"rd_bw\")>rdstat[1]).count()/totalcnt\n", + " time_wr_morethan_95 = diskbw.where(F.col(\"wr_bw\")>rdstat[1]).count()/totalcnt\n", + " out.append(['total read (G)' , bw[0][\"total read\"]/1024])\n", + " out.append(['total write (G)', bw[0][\"total write\"]/1024])\n", + " out.append(['avg read bw (MB/s)', bw[0][\"read bw\"]])\n", + " out.append(['avg write bw (MB/s)', bw[0][\"write bw\"]])\n", + " out.append(['read bw %75', rdstat[0]])\n", + " out.append(['read bw %95', rdstat[1]])\n", + " out.append(['read bw max', rdstat[2]])\n", + " out.append(['time_rd_morethan_95', time_rd_morethan_95])\n", + " out.append(['write bw %75', wrstat[0]])\n", + " out.append(['write bw %95', wrstat[1]])\n", + " out.append(['write bw max', wrstat[2]])\n", + " out.append(['time_wr_morethan_95', time_wr_morethan_95])\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " pdout.columns=[sar_disk.file.split(\"/\")[-2]]\n", + " return pdout\n", + " \n", + "class Sar_nic_analysis(Sar_analysis):\n", + " def __init__(self,sar_file):\n", + " Sar_analysis.__init__(self,sar_file)\n", + " \n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_events=Sar_analysis.generate_trace_view_list(self,id, **kwargs)\n", + " \n", + " nicfilter=\"\"\n", + " if 'nic_prefix' in kwargs.keys():\n", + " nicfilter= \"IFACE in (\" + \",\".join(kwargs.get('nic_prefix',[\"'eth3'\",\"'enp24s0f1'\"])) + \")\"\n", + " else:\n", + " nicfilter= \"IFACE != 'lo'\"\n", + " \n", + " trace_events.extend(self.col_df(nicfilter, \"eth \", lambda l: F.struct(F.floor(F.expr('cast(`rxkB/s` as float)/1024')).alias('rxmb/s'),F.floor(F.expr('cast(`txkB/s` as float)/1024')).alias('txmb/s')), id, 8))\n", + " trace_events.extend(self.col_df(\"_3 like 'ib%'\", \"ib \", lambda l: F.struct(F.floor(F.expr('cast(`rxkB/s` as float)/1024')).alias('rxmb/s'),F.floor(F.expr('cast(`txkB/s` as float)/1024')).alias('txmb/s')), id, 9))\n", + " trace_events.extend(self.col_df(\"_3 = 'lo'\", \"lo \", lambda l: F.struct(F.floor(F.expr('cast (`rxkB/s` as float)/1024')).alias('rxmb/s'),F.floor(F.expr('cast (`txkB/s` as float)/1024')).alias('txmb/s')), id, 10))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":8,\"args\":{\"sort_index \":8}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":9,\"args\":{\"sort_index \":9}}))\n", + " trace_events.append(json.dumps({\"name\": \"thread_sort_index\",\"ph\": \"M\",\"pid\":id,\"tid\":10,\"args\":{\"sort_index \":10}}))\n", + " return trace_events \n", + " \n", + " def get_stat(sar_nic,**kwargs):\n", + " Sar_analysis.get_stat(sar_nic)\n", + " nicfilter=\"\"\n", + " \n", + " if 'nic_prefix' in kwargs.keys():\n", + " nicfilter= \"IFACE in (\" + \",\".join(kwargs.get('nic_prefix',[\"'eth3'\",\"'enp24s0f1'\"])) + \")\"\n", + " else:\n", + " nicfilter= \"IFACE != 'lo'\"\n", + " \n", + " nicbw=sar_nic.df.where(nicfilter).groupBy(\"_1\").agg(F.sum(F.col(\"rxkB/s\").astype(FloatType())/1024).alias(\"rx MB/s\")).orderBy(\"_1\")\n", + " if nicbw.count()==0:\n", + " out=[[\"rx MB/s 75%\",0],[\"rx MB/s 95%\",0],[\"rx MB/s 99%\",0]]\n", + " else:\n", + " out=nicbw.stat.approxQuantile(['rx MB/s'],[0.75,0.95,0.99],0.0)[0]\n", + " out=[[\"rx MB/s 75%\",out[0]],[\"rx MB/s 95%\",out[1]],[\"rx MB/s 99%\",out[2]]]\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " pdout.columns=[sar_nic.file.split(\"/\")[-2]]\n", + " return pdout" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# PID State analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Pidstat_analysis(Analysis):\n", + " def __init__(self,sar_file):\n", + " Analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " sardata=sc.textFile(self.file)\n", + " sardf=sardata.mapPartitions(splits).toDF()\n", + " sardf=sardf.where(\"_1<>'Average:'\")\n", + " \n", + " headers=sardf.where(\"_4='TID' or _5='TID'\").limit(1).collect()\n", + " r=headers[0].asDict()\n", + " findtime=False\n", + " for i,v in r.items():\n", + " if(v==\"Time\"):\n", + " findtime=True\n", + " if not findtime:\n", + " r[\"_1\"]=\"Time\"\n", + " for i,v in r.items():\n", + " if(v!=\"\"):\n", + " sardf=sardf.withColumnRenamed(i,v)\n", + " sardf=sardf.where(\"TGID='0' or TGID='-'\") \n", + "\n", + " self.df=sardf\n", + " return sardf\n", + "\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_list=Analysis.generate_trace_view_list(self,id,**kwargs)\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " \n", + " sardf=sardf.withColumn(\"%CPU\",F.col(\"%CPU\").astype(FloatType()))\n", + " sardf=sardf.withColumn(\"Time\",F.col(\"Time\").astype(LongType()))\n", + " sardf=sardf.withColumn(\"TID\",F.col(\"TID\").astype(LongType()))\n", + " hotthreads=sardf.where(\"`%CPU`>30\").groupBy(\"TID\").count().collect()\n", + " hts=[(r[0],r[1]) for r in hotthreads]\n", + " htc=[r[1] for r in hotthreads]\n", + " if len(htc)==0:\n", + " return trace_list\n", + " maxcnt=max(htc)\n", + " hts=[r[0] for r in hts if r[1]>maxcnt/2]\n", + " tdfs=list(map(lambda x: sardf.withColumnRenamed(\"TID\",\"TID_\"+str(x)).withColumnRenamed(\"%CPU\",\"CPU_\"+str(x)).where(F.col(\"TID\")==x).select(\"Time\",\"TID_\"+str(x),\"CPU_\"+str(x)),hts))\n", + " finaldf=reduce(lambda x,y: x.join(y,on=[\"Time\"]),tdfs)\n", + " othersdf=sardf.where(\"TID not in (\"+\",\".join(map(lambda x: str(x),hts))+\")\").groupBy(\"Time\").agg(F.sum(\"%CPU\").alias(\"CPU_Other\"))\n", + " finaldf=finaldf.join(othersdf,on=[\"Time\"])\n", + " finaldf=finaldf.orderBy(\"Time\")\n", + " hts.append(\"Other\")\n", + " stt=[F.col(\"CPU_\"+str(x)).alias(str(x)) for x in hts]\n", + " args=F.struct(*stt)\n", + " \n", + " trace_list.extend(finaldf.select(\n", + " F.lit(6).alias('tid'),\n", + " (F.expr(\"Time*1000\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.lit(\"pidstat\").alias('name'),\n", + " args.alias('args')\n", + " ).toJSON().collect())\n", + " return trace_list\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Perf stat Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Perfstat_analysis(Analysis):\n", + " def __init__(self,sar_file):\n", + " Analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " sardata=sc.textFile(self.file)\n", + " sardf=sardata.mapPartitions(splits).toDF()\n", + " \n", + " paths=os.path.split(self.file)\n", + " if fs.exists(paths[0]+\"/perfstarttime\"):\n", + " with fs.open(paths[0]+\"/perfstarttime\") as f:\n", + " strf=f.read().decode('ascii')\n", + " else:\n", + " print(\"error, perfstarttime not found\")\n", + " return\n", + " \n", + " tsc_freq_file = os.path.join(paths[0], 'tsc_freq')\n", + " if fs.exists(tsc_freq_file):\n", + " self.tsc_freq = int(spark.read.text(tsc_freq_file).collect()[0][0])\n", + " else:\n", + " print(f'{tsc_freq_file} not exists')\n", + " return\n", + " \n", + " totalcores_file = os.path.join(paths[0], 'totalcores')\n", + " if fs.exists(totalcores_file):\n", + " self.totalcores = int(spark.read.text(totalcores_file).collect()[0][0])\n", + " else:\n", + " print(f'{totalcores_file} not exists')\n", + " return\n", + " \n", + " strf=strf[len(\"# started on \"):].strip()\n", + " starttime=datetime.strptime(strf, \"%a %b %d %H:%M:%S %Y\").timestamp()*1000\n", + " sardf=sardf.where(\"_1<>'#'\")\n", + " sardf=sardf.withColumn(\"ts\",F.col(\"_2\").astype(DoubleType())*1000+F.lit(starttime)).where(\"ts is not null\").select(\"ts\",\"_3\",\"_4\")\n", + " sardf=sardf.withColumn('_3', F.regexp_replace('_3', ',', '').astype(LongType()))\n", + " sardf=sardf.cache()\n", + " self.df=sardf\n", + " return sardf\n", + "\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_list=Analysis.generate_trace_view_list(self,id,**kwargs)\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " \n", + " stringIndexer = StringIndexer(inputCol=\"_4\", outputCol=\"syscall_idx\")\n", + " model = stringIndexer.fit(sardf)\n", + " sardf=model.transform(sardf)\n", + " \n", + "# cnts=sardf.select(\"_4\").distinct().collect()\n", + "# cnts=[l['_4'] for l in cnts]\n", + "# cntmap={ cnts[i]:i for i in range(0, len(cnts) ) }\n", + "# mapexpr=F.create_map([F.lit(x) for x in chain(*cntmap.items())])\n", + "# sardf.select(mapexpr.getItem(F.col(\"_4\")))\n", + " \n", + " sardf=sardf.withColumn(\"syscall_idx\",F.col(\"syscall_idx\").astype(IntegerType()))\n", + " \n", + " trace_list.extend(sardf.select(\n", + " (F.lit(100)+F.col(\"syscall_idx\")).alias('tid'),\n", + " (F.col(\"ts\")-F.lit(starttime)).astype(LongType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.col(\"_4\").alias('name'),\n", + " F.struct(F.col(\"_3\").alias(\"cnt\")).alias('args')\n", + " ).toJSON().collect())\n", + " return trace_list\n", + " \n", + " def get_stat(self, **kwargs):\n", + " if self.df is None:\n", + " self.load_data()\n", + "\n", + " raw_data = spark.read.text(self.file)\n", + "\n", + " # Filter out non-data lines and split the data into columns\n", + " filtered_data = raw_data.filter(\n", + " ~raw_data.value.startswith('#') & raw_data.value.rlike(r\"^\\s*\\d\")\n", + " )\n", + "\n", + " split_data = filtered_data.rdd.map(lambda row: row[0].split()).map(\n", + " lambda parts: (float(parts[0]), int(parts[1].replace(\",\", \"\")), parts[2], '' if len(parts) == 3 else parts[4])\n", + " )\n", + "\n", + " schema = [\"time\", \"counts\", \"events\", \"ipc\"]\n", + " df = split_data.toDF(schema)\n", + "\n", + " events_df = df.filter(col('ipc') == '')\n", + " ipc_df = df.filter(col('ipc') != '')\n", + "\n", + " instructions = ipc_df.select(_sum(col(\"counts\"))).collect()[0][0] / 1e9\n", + " avg_ipc = ipc_df.select(avg(col(\"ipc\"))).collect()[0][0]\n", + "\n", + " df_ccu_ref_tsc = events_df.select(col('time'), col('counts')).filter(col('events') == 'cpu_clk_unhalted.ref_tsc').withColumnRenamed('counts', 'cpu_clk_unhalted_ref_tsc')\n", + " df_ccu_thread = events_df.select(col('time'), col('counts')).filter(col('events') == 'cpu_clk_unhalted.thread').withColumnRenamed('counts', 'cpu_clk_unhalted_thread')\n", + "\n", + " window_spec = Window.orderBy(\"time\")\n", + " df_ccu_ref_tsc = df_ccu_ref_tsc.withColumn(\"prev_time\", lag(\"time\").over(window_spec))\n", + " df_ccu_ref_tsc = df_ccu_ref_tsc.withColumn(\"prev_time\", when(col(\"prev_time\").isNull(), 0).otherwise(col(\"prev_time\")))\n", + " df_ccu_ref_tsc = df_ccu_ref_tsc.withColumn(\"tsc\", (col(\"time\") - col(\"prev_time\")) * self.tsc_freq)\n", + "\n", + " joined_df = df_ccu_ref_tsc.join(df_ccu_thread, on=[\"time\"], how=\"inner\")\n", + " cpu_freq_df = joined_df.withColumn(\"freq\", joined_df.cpu_clk_unhalted_thread / joined_df.cpu_clk_unhalted_ref_tsc * self.tsc_freq / 1e9)\n", + " cpu_freq = cpu_freq_df.select(avg(col('freq'))).collect()[0][0]\n", + "\n", + " cpu_util_df = df_ccu_ref_tsc.withColumn(\"cpu%\", col(\"cpu_clk_unhalted_ref_tsc\") / col(\"tsc\") / self.totalcores * 100)\n", + " cpu_util = cpu_util_df.select(avg(col('cpu%'))).collect()[0][0]\n", + "\n", + " out = [['ipc', avg_ipc], ['instructions', instructions], ['cpu_freq', cpu_freq], ['cpu%', cpu_util]]\n", + " pdout=pandas.DataFrame(out).set_index(0)\n", + " \n", + " return pdout" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# GPU analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class gpu_analysis(Analysis):\n", + " def __init__(self,gpu_file):\n", + " Analysis.__init__(self,gpu_file)\n", + " \n", + " def load_data(self):\n", + " df_pf=spark.read.format(\"com.databricks.spark.csv\").option(\"header\",\"true\").option(\"mode\", \"DROPMALFORMED\").option(\"delimiter\", \",\").load(self.file)\n", + " df_pf2=df_pf.withColumn('timestamp',F.unix_timestamp(F.col('timestamp'),'yyyy/MM/dd HH:mm:ss')*1000+(F.split(F.col('timestamp'),'\\.')[1]).astype(IntegerType()))\n", + " df_pf2=df_pf2.withColumnRenamed(' utilization.gpu [%]','gpu_util')\n", + " df_pf2=df_pf2.withColumnRenamed(' utilization.memory [%]','mem_util')\n", + " df_pf2=df_pf2.withColumnRenamed(' memory.used [MiB]','mem_used')\n", + " df_pf2=df_pf2.withColumnRenamed(' index','index')\n", + " df_pf2=df_pf2.withColumn('gpu_util', (F.split('gpu_util',' ')[1]).astype(IntegerType()))\n", + " df_pf2=df_pf2.withColumn('mem_util', (F.split('mem_util',' ')[1]).astype(IntegerType()))\n", + " df_pf2=df_pf2.withColumn('mem_used', (F.split('mem_used',' ')[1]).astype(IntegerType()))\n", + " df_pf.cache()\n", + " self.df=df_pf2\n", + " return df_pf2\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " Analysis.generate_trace_view_list(self,id)\n", + " \n", + " df_pf2=self.df\n", + " starttime=self.starttime\n", + " trace_events=[]\n", + " \n", + " trace_events.extend(df_pf2.orderBy(df_pf2['timestamp']).select(\n", + " F.col('index').alias('tid'),\n", + " (F.expr(\"timestamp\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('gpu_util_'),F.col('index')).alias('name'),\n", + " F.struct(F.col('gpu_util').alias('gpu')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " trace_events.extend(df_pf2.orderBy(df_pf2['timestamp']).select(\n", + " F.col('index').alias('tid'),\n", + " (F.expr(\"timestamp\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(int(id)+1).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('mem_util_'),F.col('index')).alias('name'),\n", + " F.struct((F.col('mem_used')/F.lit(32768)).alias('mem')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " return trace_events" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "def splits_dmon(x):\n", + " fi=[]\n", + " for l in x:\n", + " l=l.strip()\n", + " if l.startswith('20'):\n", + " li=re.split(r'\\s+',l)\n", + " if len(li)==11:\n", + " fi.append(li)\n", + " return iter(fi)\n", + "\n", + "class gpu_dmon_analysis(Analysis):\n", + " def __init__(self,gpu_file):\n", + " Analysis.__init__(self,gpu_file)\n", + " \n", + " def load_data(self):\n", + " df_pf=sc.textFile(self.file)\n", + " df_pf=df_pf.mapPartitions(splits_dmon).toDF()\n", + " \n", + " df_pf2=df_pf.withColumn('_1',F.unix_timestamp(F.concat_ws(' ',F.col('_1'),F.col('_2')),'yyyyMMdd HH:mm:ss')*1000)\n", + " for c in range(3,12):\n", + " df_pf2=df_pf2.withColumn(f'_{c}',F.col(f'_{c}').astype(IntegerType()))\n", + "\n", + " df_pf.cache()\n", + " self.df=df_pf2\n", + " return df_pf2\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " Analysis.generate_trace_view_list(self,id)\n", + "\n", + " df_pf2=self.df\n", + " starttime=self.starttime\n", + " trace_events=[]\n", + " \n", + " trace_events.extend(df_pf2.orderBy(df_pf2['_1']).select(\n", + " F.col('_3').alias('tid'),\n", + " (F.expr(\"_1\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('gpu_util_'),F.col('_3')).alias('name'),\n", + " F.struct(F.col('_4').alias('gpu')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " trace_events.extend(df_pf2.orderBy(df_pf2['_1']).select(\n", + " F.col('_3').alias('tid'),\n", + " (F.expr(\"_1\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id+1).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('mem_util_'),F.col('_3')).alias('name'),\n", + " F.struct(F.col('_5').alias('mem')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " trace_events.extend(df_pf2.orderBy(df_pf2['_1']).select(\n", + " F.col('_3').alias('tid'),\n", + " (F.expr(\"_1\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id+2).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('gpu_freq_'),F.col('_3')).alias('name'),\n", + " F.struct(F.col('_9').alias('gpu_freq')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " trace_events.extend(df_pf2.orderBy(df_pf2['_1']).select(\n", + " F.col('_3').alias('tid'),\n", + " (F.expr(\"_1\")-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(id+3).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.concat(F.lit('pcie_'),F.col('_3')).alias('name'),\n", + " F.struct(F.col('_10').alias('tx'),F.col('_11').alias('rx')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " return trace_events\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# DASK analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "def split_dask(x):\n", + " fi=[]\n", + " for l in x:\n", + " print(l)\n", + " li=[]\n", + " if l.startswith('('):\n", + " lx=re.split(r'[()]',l)\n", + " lv=lx[1]\n", + " p=re.search(r\"'(.*)-([0-9a-f]+)', *(\\d+)\",lv)\n", + " if not p:\n", + " print(\"dask log first field doesn't match (.*)-[0-9a-f]+', *(\\d+)\")\n", + " return\n", + " li.append(p.group(1))\n", + " li.extend(lx[2].split(\",\")[1:])\n", + " li.append(p.group(3))\n", + " else:\n", + " li=l.split(',')\n", + " p=re.search(r\"(.*)-([0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+-[0-9a-f]+)$\",li[0])\n", + " if not p:\n", + " p=re.search(r\"(.*)-([0-9a-f]+)$\",li[0])\n", + " \n", + " li[0]=p.group(1)\n", + " li.append(p.group(2))\n", + " fi.append(li)\n", + " return iter(fi)\n", + "\n", + "class dask_analysis(Analysis):\n", + " def __init__(self,dask_file):\n", + " Analysis.__init__(self,dask_file)\n", + "\n", + " def load_data(self):\n", + " rdds=sc.textFile(self.file)\n", + " df_pf=rdds.mapPartitions(split_dask).toDF()\n", + " df_pf=df_pf.withColumnRenamed('_1','_c0')\n", + " df_pf=df_pf.withColumnRenamed('_2','_c1')\n", + " df_pf=df_pf.withColumnRenamed('_3','_c2')\n", + " df_pf=df_pf.withColumnRenamed('_4','_c3')\n", + " df_pf=df_pf.withColumnRenamed('_5','_id')\n", + " \n", + " df_pf=df_pf.withColumn('_c1',F.split(F.col('_c1'),\":\")[2])\n", + " df_pf=df_pf.withColumn('_c3',df_pf._c3.astype(DoubleType())*1000) \n", + " df_pf=df_pf.withColumn('_c2',df_pf._c2.astype(DoubleType())*1000)\n", + " \n", + " df_pf.cache()\n", + " self.df=df_pf\n", + " self.starttime=df_pf.agg(F.min(\"_c2\")).collect()[0]['min(_c2)']\n", + " return df_pf\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " Analysis.generate_trace_view_list(self,id)\n", + " \n", + " df_pf=self.df\n", + "\n", + " window = Window.partitionBy(\"_c1\").orderBy(\"_c3\")\n", + " df_pf=df_pf.withColumn(\"last_tsk_done\", F.lag('_c3', 1, None).over(window))\n", + " df_pf=df_pf.withColumn('last_tsk_done',F.coalesce('last_tsk_done','_c2'))\n", + " df_pf=df_pf.withColumn('last_tsk_done',F.when(F.col('_c2')>F.col('last_tsk_done'),F.col('_c2')).otherwise(F.col('last_tsk_done')) )\n", + " \n", + " trace_events=[]\n", + " \n", + " trace_events.extend(df_pf.select(\n", + " F.col('_c1').alias('tid'),\n", + " (F.col('last_tsk_done')-F.lit(self.starttime)).astype(IntegerType()).alias('ts'),\n", + " F.expr('_c3 - last_tsk_done ').alias('dur'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('X').alias('ph'),\n", + " F.col('_c0').alias('name'),\n", + " F.struct(F.col('_id').alias('uuid')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " return trace_events" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class dask_analysis_log(dask_analysis):\n", + " def __init__(self,dask_file,logs):\n", + " Analysis.__init__(self,dask_file)\n", + "\n", + " def load_data(self):\n", + " rdds=sc.textFile(self.file)\n", + " df_pf=rdds.mapPartitions(split_dask).toDF()\n", + " df_pf=df_pf.withColumnRenamed('_1','_c0')\n", + " df_pf=df_pf.withColumnRenamed('_2','_c1')\n", + " df_pf=df_pf.withColumnRenamed('_3','_c2')\n", + " df_pf=df_pf.withColumnRenamed('_4','_c3')\n", + " df_pf=df_pf.withColumnRenamed('_5','_id')\n", + " \n", + " df_pf=df_pf.withColumn('_c1',F.split(F.col('_c1'),\":\")[2])\n", + " df_pf=df_pf.withColumn('_c3',df_pf._c3.astype(DoubleType())*1000) \n", + " df_pf=df_pf.withColumn('_c2',df_pf._c2.astype(DoubleType())*1000)\n", + " \n", + " df_pf.cache()\n", + " self.df=df_pf\n", + " self.starttime=df_pf.agg(F.min(\"_c2\")).collect()[0]['min(_c2)']\n", + " return df_pf\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " Analysis.generate_trace_view_list(self,id)\n", + " \n", + " df_pf=self.df\n", + "\n", + " window = Window.partitionBy(\"_c1\").orderBy(\"_c3\")\n", + " df_pf=df_pf.withColumn(\"last_tsk_done\", F.lag('_c3', 1, None).over(window))\n", + " df_pf=df_pf.withColumn('last_tsk_done',F.coalesce('last_tsk_done','_c2'))\n", + " df_pf=df_pf.withColumn('last_tsk_done',F.when(F.col('_c2')>F.col('last_tsk_done'),F.col('_c2')).otherwise(F.col('last_tsk_done')) )\n", + " \n", + " trace_events=[]\n", + " \n", + " trace_events.extend(df_pf.select(\n", + " F.col('_c1').alias('tid'),\n", + " (F.col('last_tsk_done')-F.lit(self.starttime)).astype(IntegerType()).alias('ts'),\n", + " F.expr('_c3 - last_tsk_done ').alias('dur'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('X').alias('ph'),\n", + " F.col('_c0').alias('name'),\n", + " F.struct(F.col('_id').alias('uuid')).alias('args')\n", + " ).toJSON().collect())\n", + "\n", + " return trace_events" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# instantevent analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "## format: _2 = Name; _3 = time\n", + "\n", + "class InstantEvent_analysis(Analysis):\n", + " def __init__(self,sar_file):\n", + " Analysis.__init__(self,sar_file)\n", + " \n", + " def load_data(self):\n", + " sardata=sc.textFile(self.file)\n", + " sardf=sardata.mapPartitions(splits).toDF()\n", + " self.df=sardf\n", + " return sardf\n", + "\n", + "\n", + " def generate_trace_view_list(self,id=0,**kwargs):\n", + " Analysis.generate_trace_view_list(self,id)\n", + " sardf=self.df\n", + " starttime=self.starttime\n", + " return sardf.select(F.lit(0).alias('tid'),\n", + " (F.col(\"_3\").astype(DoubleType())*1000-F.lit(starttime)).astype(IntegerType()).alias('ts'),\n", + " F.lit(0).alias('pid'),\n", + " F.lit('i').alias('ph'),\n", + " F.col(\"_2\").alias('name'),\n", + " F.lit(\"g\").alias(\"s\")\n", + " ).toJSON().collect()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# HBM_Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class HBM_analysis(Analysis):\n", + " def __init__(self,file):\n", + " Analysis.__init__(self,file)\n", + " \n", + " def load_data(self):\n", + " df=spark.read.option(\"delimiter\", \", \").option(\"header\", \"true\").csv(self.file)\n", + " self.df=df.withColumn(\"ts\", F.unix_timestamp(df.timestamp)).withColumn(\"size\", df.size.cast(LongType())).withColumn(\"free\", df.free.cast(LongType()))\n", + " return self.df\n", + "\n", + " def generate_trace_view_list(self,id,**kwargs):\n", + " trace_list=Analysis.generate_trace_view_list(self,id,**kwargs)\n", + " hbmdf=self.df\n", + " starttime=self.starttime\n", + " \n", + " trace_list.extend(hbmdf.select(\n", + " F.lit(0).alias('tid'),\n", + " (F.col(\"ts\") * F.lit(1000)-F.lit(starttime)).astype(LongType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.lit(\"hbm\").alias('name'),\n", + " F.struct((F.col(\"size\")-F.col(\"free\")).alias('hbmused'), F.col(\"free\").alias('hbmfree')).alias('args')\n", + " ).toJSON().collect())\n", + " \n", + " trace_list.extend(hbmdf.select(\n", + " F.lit(0).alias('tid'),\n", + " (F.col(\"ts\") * F.lit(1000)-F.lit(starttime)).astype(LongType()).alias('ts'),\n", + " F.lit(id).alias('pid'),\n", + " F.lit('C').alias('ph'),\n", + " F.lit(\"hbm %\").alias('name'),\n", + " F.struct(((F.lit(1) - F.col(\"free\") / F.col(\"size\")) * F.lit(100)).alias('%hbmused')).alias('args')\n", + " ).toJSON().collect())\n", + " return trace_list" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Run base" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Run:\n", + " def __init__(self,samples):\n", + " self.samples=samples\n", + " \n", + " def generate_trace_view(self,appid,**kwargs):\n", + " traces=[]\n", + " \n", + " for idx, s in enumerate(self.samples):\n", + " traces.extend(s.generate_trace_view_list(idx,**kwargs)) \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ]\n", + " }'''\n", + "\n", + " with open('/home/sparkuser/trace_result/'+appid+'.json', 'w') as outfile: \n", + " outfile.write(output)\n", + "\n", + " print(f\"http://{localhost}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{appid}.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Dask Application Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Dask_Application_Run:\n", + " def __init__(self, appid):\n", + " self.appid=appid\n", + " self.filedir=\"/tmp/dgx-2Log/\"+self.appid+\"/\"\n", + " \n", + " self.analysis={\n", + " 'dask':{'als':dask_analysis(self.filedir+\"cluster.log\"),'pid':8000},\n", + " 'sar_cpu':{'als':Sar_cpu_analysis(self.filedir + \"/\"+\"sar_cpu.sar\"),'pid':10*0+0},\n", + " 'sar_disk':{'als':Sar_disk_analysis(self.filedir + \"/\"+\"sar_disk.sar\"),'pid':10*0+1},\n", + " 'sar_mem':{'als':Sar_mem_analysis(self.filedir + \"/\"+\"sar_mem.sar\"),'pid':10*0+2},\n", + " 'sar_nic':{'als':Sar_nic_analysis(self.filedir + \"/\"+\"sar_nic.sar\"),'pid':10*0+3},\n", + " 'emon':{'als':Emon_Analysis(self.filedir + \"/\"+\"emon.rst\"),'pid':10*0+4},\n", + " 'gpu':{'als':gpu_analysis(self.filedir + \"/gpu.txt\"),'pid':10*0+5},\n", + " }\n", + " \n", + " \n", + " def generate_trace_view(self,showsar=True,showemon=False,showgpu=True,**kwargs):\n", + " traces=[]\n", + " daskals=self.analysis['dask']['als']\n", + " traces.extend(daskals.generate_trace_view_list(self.analysis['dask']['pid'],**kwargs))\n", + " if showsar:\n", + " sarals=self.analysis['sar_cpu']['als']\n", + " sarals.starttime=daskals.starttime\n", + " traces.extend(sarals.generate_trace_view_list(self.analysis['sar_cpu']['pid'],**kwargs))\n", + " sarals=self.analysis['sar_disk']['als']\n", + " sarals.starttime=daskals.starttime\n", + " traces.extend(sarals.generate_trace_view_list(self.analysis['sar_disk']['pid'],**kwargs))\n", + " sarals=self.analysis['sar_mem']['als']\n", + " sarals.starttime=daskals.starttime\n", + " traces.extend(sarals.generate_trace_view_list(self.analysis['sar_mem']['pid'],**kwargs))\n", + " sarals=self.analysis['sar_nic']['als']\n", + " sarals.starttime=daskals.starttime\n", + " traces.extend(sarals.generate_trace_view_list(self.analysis['sar_nic']['pid'],**kwargs))\n", + " if showemon:\n", + " emonals=self.analysis['emon']['als']\n", + " emonals.starttime=daskals.starttime\n", + " traces.extend(emonals.generate_trace_view_list(self.analysis['emon']['pid'],**kwargs))\n", + " if showgpu:\n", + " gpuals=self.analysis['gpu']['als']\n", + " gpuals.starttime=daskals.starttime\n", + " traces.extend(gpuals.generate_trace_view_list(self.analysis['gpu']['pid'],**kwargs))\n", + " \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ]\n", + " }'''\n", + "\n", + " with open('/home/sparkuser/trace_result/'+self.appid+'.json', 'w') as outfile: \n", + " outfile.write(output)\n", + "\n", + " print(\"http://sr219:1088/tracing_examples/trace_viewer.html#/tracing/test_data/\"+self.appid+\".json\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "datetime.fromtimestamp(1546439400)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Dask_Application_Run2:\n", + " def __init__(self, appid):\n", + " self.appid=appid\n", + " \n", + " self.filedir=\"/tmp/dgx-2Log/\"+self.appid+\"/\"\n", + " self.dask=self.load_dask()\n", + " self.sar=self.load_sar()\n", + " self.gpu=self.load_gpu()\n", + " \n", + " \n", + " def load_dask(self):\n", + " return dask_analysis(self.filedir+\"cluster.log\")\n", + " \n", + " def load_sar(self):\n", + " return Sar_analysis(self.filedir+\"sar_data.sar\")\n", + " \n", + " def load_emon(self):\n", + " return Emon_Analysis(self.filedir+\"emon.rst\")\n", + " \n", + " def load_gpu(self):\n", + " return gpu_dmon_analysis(self.filedir+\"gpu_dmon.txt\")\n", + " \n", + " def generate_dask_trace_view(self):\n", + " return self.dask.generate_dask_trace_view(8000)\n", + " \n", + " def generate_sar_trace_view(self):\n", + " return self.sar.generate_sar_trace_view(0)\n", + " \n", + " def generate_gpu_trace_view(self):\n", + " return self.gpu.generate_gpu_trace_view(1)\n", + "\n", + " def generate_emon_trace_view(self,collected_cores):\n", + " return self.emon.generate_emon_trace_view(5,collected_cores)\n", + " \n", + " def generate_trace_view(self,showsar=True,showemon=False,showgpu=True):\n", + " traces=[]\n", + " traces.extend(self.generate_dask_trace_view())\n", + " if showsar:\n", + " self.sar.starttime=self.dask.starttime\n", + " traces.extend(self.generate_sar_trace_view())\n", + " if showemon:\n", + " traces.extend(self.generate_emon_trace_view(collected_cores))\n", + " if showgpu:\n", + " self.gpu.starttime=self.dask.starttime\n", + " traces.extend(self.generate_gpu_trace_view())\n", + " \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ]\n", + " }'''\n", + "\n", + " with open('/home/sparkuser/trace_result/'+self.appid+'.json', 'w') as outfile: \n", + " outfile.write(output)\n", + "\n", + " print(f\"http://{localhost}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{appid}.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Application RUN STD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Application_Run_STD:\n", + " def __init__(self, appid):\n", + " self.appid=appid\n", + " self.filedir=\"/tmp/dgx-2Log/\"+self.appid+\"/\"\n", + " \n", + " self.analysis={\n", + " 'sar':{'als':Sar_analysis(self.filedir+\"sar_data.sar\"),'pid':0},\n", + " 'emon':{'als':Emon_Analysis(self.filedir+\"emon.rst\"),'pid':1},\n", + " 'gpu':{'als':gpu_analysis(self.filedir+\"gpu.txt\"),'pid':100},\n", + " }\n", + " \n", + " \n", + " def generate_trace_view(self,showsar=True,showemon=False,showgpu=True,**kwargs):\n", + " traces=[]\n", + " starttime=time.time()*1000\n", + " if showsar:\n", + " sarals=self.analysis['sar']['als']\n", + " sarals.starttime=starttime\n", + " traces.extend(sarals.generate_trace_view_list(self.analysis['sar']['pid'],**kwargs))\n", + " if showemon:\n", + " emonals=self.analysis['emon']['als']\n", + " emonals.starttime=starttime\n", + " traces.extend(emonals.generate_trace_view_list(self.analysis['emon']['pid'],**kwargs))\n", + " if showgpu:\n", + " gpuals=self.analysis['gpu']['als']\n", + " gpuals.starttime=starttime\n", + " traces.extend(gpuals.generate_trace_view_list(self.analysis['gpu']['pid'],**kwargs))\n", + " \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ]\n", + " }'''\n", + "\n", + " with open('/home/sparkuser/trace_result/'+self.appid+'.json', 'w') as outfile: \n", + " outfile.write(output)\n", + "\n", + " print(f\"http://{localhost}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{appid}.json\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# Application Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "class Application_Run:\n", + " def __init__(self, appid,**kwargs):\n", + " self.appid=appid\n", + " \n", + " basedir=kwargs.get(\"basedir\",\"skylake\")\n", + " self.filedir=\"/\"+basedir+\"/\"+self.appid+\"/\"\n", + " self.basedir=basedir\n", + " \n", + " slaves=fs.list_status(\"/\"+basedir+\"/\"+appid)\n", + " slaves=[f['pathSuffix'] for f in slaves if f['type']=='DIRECTORY' and f['pathSuffix']!=\"summary.parquet\"]\n", + " \n", + " jobids=kwargs.get(\"jobids\",None)\n", + " \n", + " self.clients=slaves\n", + " \n", + " sarclnt={}\n", + " for idx,l in enumerate(self.clients):\n", + " sarclnt[l]={'sar_cpu':{'als':Sar_cpu_analysis(self.filedir + l + \"/\"+\"sar_cpu.sar\"),'pid':idx},\n", + " 'sar_disk':{'als':Sar_disk_analysis(self.filedir + l + \"/\"+\"sar_disk.sar\"),'pid':idx},\n", + " 'sar_mem':{'als':Sar_mem_analysis(self.filedir + l + \"/\"+\"sar_mem.sar\"),'pid':idx},\n", + " 'sar_nic':{'als':Sar_nic_analysis(self.filedir + l + \"/\"+\"sar_nic.sar\"),'pid':idx}\n", + " }\n", + " if fs.exists(self.filedir + l + \"/sar_page.sar\"):\n", + " sarclnt[l]['sar_page']={'als':Sar_PageCache_analysis(self.filedir + l + \"/\"+\"sar_page.sar\"),'pid':idx}\n", + " \n", + " if fs.exists(self.filedir + l + \"/pidstat.out\"):\n", + " sarclnt[l]['sar_pid']={'als':Pidstat_analysis(self.filedir + l + \"/pidstat.out\"),'pid':idx}\n", + " if fs.exists(self.filedir + l + \"/sched.txt\"):\n", + " sarclnt[l]['sar_perf']={'als':Perf_trace_analysis(self.filedir + l + \"/sched.txt\"),'pid':100+idx}\n", + " if fs.exists(self.filedir + l + \"/emon.rst\"):\n", + " self.show_emon=True\n", + " sarclnt[l]['emon']={'als':Emon_Analysis(self.filedir + l + \"/emon.rst\"),'pid':200+idx}\n", + " if fs.exists(self.filedir + l + \"/perfstat.txt\"):\n", + " self.show_perfstat=True\n", + " sarclnt[l]['perfstat']={'als':Perfstat_analysis(self.filedir + l + \"/perfstat.txt\"),'pid':300+idx}\n", + " if fs.exists(self.filedir + l + \"/gpu.txt\"):\n", + " sarclnt[l]['gpu']={'als':gpu_analysis(self.filedir + l + \"/gpu.txt\"),'pid':400+idx}\n", + " \n", + " \n", + " self.analysis={\n", + " \"sar\": sarclnt\n", + " }\n", + " \n", + " if fs.exists(self.filedir+\"app.log\"):\n", + " self.analysis['app']={'als':App_Log_Analysis(self.filedir+\"app.log\",jobids)}\n", + " \n", + " if fs.exists(self.filedir+\"instevent.out\"):\n", + " self.analysis['instant']={'als':InstantEvent_analysis(self.filedir+\"instevent.out\")}\n", + " \n", + " self.starttime=0\n", + " if fs.exists(self.filedir+\"starttime\"):\n", + " with fs.open(self.filedir+\"starttime\") as f:\n", + " st = f.read().decode('ascii')\n", + " self.starttime=int(st)\n", + " \n", + " def generate_trace_view(self,showsar=True,showgpu=True,showhbm=False,**kwargs):\n", + " traces=[]\n", + " shownodes=kwargs.get(\"shownodes\",self.clients)\n", + " for l in shownodes:\n", + " if l not in self.clients:\n", + " print(l,\"is not in clients\",self.clients)\n", + " return\n", + " self.clients=shownodes\n", + " \n", + " xgbtcks=kwargs.get('xgbtcks',(\"calltrain\",'enter','begin','end'))\n", + " \n", + " if \"app\" in self.analysis:\n", + " appals=self.analysis['app']['als']\n", + " appals.starttime=self.starttime\n", + " traces.extend(appals.generate_trace_view_list(self.analysis['app'],**kwargs))\n", + " self.starttime=appals.starttime\n", + " \n", + " if 'instant' in self.analysis:\n", + " als=self.analysis['instant']['als']\n", + " als.starttime=self.starttime\n", + " traces.extend(als.generate_trace_view_list(**kwargs))\n", + " \n", + " counttime=kwargs.get(\"counttime\",False)\n", + " \n", + " pidmap={}\n", + " if showsar:\n", + " for l in self.clients:\n", + " for alskey, sarals in self.analysis[\"sar\"][l].items():\n", + " t1 = time.time()\n", + " if alskey!=\"emon\":\n", + " sarals['als'].starttime=self.starttime\n", + " traces.extend(sarals['als'].generate_trace_view_list(sarals['pid'],node=l, **kwargs))\n", + " elif self.show_emon:\n", + " sarals['als'].load_data()\n", + " pidmap[l]=sarals['pid']\n", + " if counttime:\n", + " print(l,alskey,\" spend time: \", time.time()-t1)\n", + " if self.show_emon:\n", + " t1 = time.time()\n", + " emondfs=get_emon_parquets([self.appid,],self.basedir)\n", + " emons=Emon_Analysis_All(emondfs)\n", + " emons.starttime=self.starttime\n", + " traces.extend(emons.generate_trace_view_list(0,pidmap=pidmap,**kwargs))\n", + " if counttime:\n", + " print(\"emon process spend time: \", time.time()-t1)\n", + " self.emons=emons\n", + " \n", + " if showhbm:\n", + " for l in self.clients:\n", + " t1 = time.time()\n", + " hbm_analysis=HBM_analysis(self.filedir + l + \"/numactl.csv\")\n", + " hbm_analysis.starttime=self.starttime\n", + " traces.extend(hbm_analysis.generate_trace_view_list(0,**kwargs))\n", + " if counttime:\n", + " print(l, \" hbm process spend time: \", time.time()-t1)\n", + " \n", + " for idx,l in enumerate(self.clients):\n", + " traces.append(json.dumps({\"name\": \"process_sort_index\",\"ph\": \"M\",\"pid\":idx,\"tid\":0,\"args\":{\"sort_index \":idx}}))\n", + " traces.append(json.dumps({\"name\": \"process_sort_index\",\"ph\": \"M\",\"pid\":idx+100,\"tid\":0,\"args\":{\"sort_index \":idx+100}}))\n", + " traces.append(json.dumps({\"name\": \"process_sort_index\",\"ph\": \"M\",\"pid\":idx+200,\"tid\":0,\"args\":{\"sort_index \":idx+200}}))\n", + " \n", + " if \"app\" in self.analysis:\n", + " for pid in self.analysis['app']['als'].pids:\n", + " traces.append(json.dumps({\"name\": \"process_sort_index\",\"ph\": \"M\",\"pid\":pid+200,\"tid\":0,\"args\":{\"sort_index \":pid+200}}))\n", + "\n", + " allcnt=\"\"\n", + " for c in self.clients:\n", + " paths=self.filedir+c\n", + " if fs.exists(paths+\"/xgbtck.txt\"):\n", + " with fs.open(paths+\"/xgbtck.txt\") as f:\n", + " tmp = f.read().decode('ascii')\n", + " allcnt=allcnt+tmp\n", + " allcnt=allcnt.strip().split(\"\\n\")\n", + " if len(allcnt) > 1:\n", + " allcnt=[l.split(\" \") for l in allcnt]\n", + " cnts=pandas.DataFrame([[l[0],l[1],l[2],l[3]] for l in allcnt if len(l)>1 and l[1] in xgbtcks])\n", + " if len(cnts) > 0:\n", + " cnts.columns=['xgbtck','name','rank','time']\n", + " cntgs=cnts.groupby(\"name\").agg({\"time\":\"min\"})\n", + " cntgs=cntgs.reset_index()\n", + " cntgs.columns=['name','ts']\n", + " cntgs['ph']=\"i\"\n", + " cntgs['ts']=pandas.to_numeric(cntgs['ts'])-self.starttime\n", + " cntgs['pid']=0\n", + " cntgs['tid']=0\n", + " cntgs['s']='g'\n", + " traces.extend([json.dumps(l) for l in cntgs.to_dict(orient='records')])\n", + " \n", + " output='''\n", + " {\n", + " \"traceEvents\": [\n", + " \n", + " ''' + \\\n", + " \",\\n\".join(traces)\\\n", + " + '''\n", + " ],\n", + " \"displayTimeUnit\": \"ns\"\n", + " }'''\n", + "\n", + " with open('/home/sparkuser/trace_result/'+self.appid+'.json', 'w') as outfile: \n", + " outfile.write(output)\n", + " \n", + " traceview_link=f'http://{localhost}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{self.appid}.json'\n", + " display(HTML(f\"{traceview_link}\"))\n", + " return traceview_link\n", + "\n", + " def getemonmetric(app,**kwargs):\n", + " emondfs=get_emon_parquets([app.appid],app.basedir)\n", + " emons=Emon_Analysis_All(emondfs)\n", + " metric_msg_map={\n", + " 'emon_instr_retired':F.sum\n", + " }\n", + " \n", + " emonmetric=kwargs.get(\"show_metric\",None)\n", + "\n", + " outdf=None\n", + " for k in emonmetric:\n", + " m=emons.emon_metrics[k]\n", + " for fk,fm in m['formula'].items():\n", + " if k not in metric_msg_map:\n", + " metric_msg_map[k]=F.avg\n", + " df=emons.gen_reduce_metric(k,list(range(0,emons.totalcores)),fk,metric_msg_map[k])\n", + " tmpdf=df.groupBy(\"appid\",'client').agg(*[l(\"`{:s}`\".format(fk)).alias(get_alias_name(fk,l)) for l in [metric_msg_map[k]]]).toPandas()\n", + " tmpdf=tmpdf.set_index(\"client\").drop(columns=['appid']).T\n", + " if outdf is None:\n", + " outdf=tmpdf\n", + " else:\n", + " outdf=outdf.append(tmpdf)\n", + " pandas.options.display.float_format = '{:,.2f}'.format\n", + " return outdf\n", + " \n", + " def get_sar_stat(app,**kwargs):\n", + " disk_prefix=kwargs.get(\"disk_prefix\",\"dev259\")\n", + " nic_prefix = kwargs.get(\"nic_prefix\",[\"'eth3'\",\"'enp24s0f1'\"])\n", + " cpustat=[app.analysis[\"sar\"][l]['sar_cpu']['als'].get_stat() for l in app.clients]\n", + " cpustat=reduce(lambda l,r:l.join(r),cpustat)\n", + " diskstat=[app.analysis[\"sar\"][l]['sar_disk']['als'].get_stat(disk_prefix=disk_prefix) for l in app.clients]\n", + " diskstat=reduce(lambda l,r:l.join(r),diskstat)\n", + " memstat=[app.analysis[\"sar\"][l]['sar_mem']['als'].get_stat() for l in app.clients]\n", + " memstat=reduce(lambda l,r:l.join(r),memstat)\n", + " nicstat=[app.analysis[\"sar\"][l]['sar_nic']['als'].get_stat(nic_prefix=nic_prefix) for l in app.clients]\n", + " nicstat=reduce(lambda l,r:l.join(r),nicstat)\n", + " pagestat=[app.analysis[\"sar\"][l]['sar_page']['als'].get_stat() for l in app.clients]\n", + " pagestat=reduce(lambda l,r:l.join(r),pagestat)\n", + " pandas.options.display.float_format = '{:,.2f}'.format\n", + " return pandas.concat([cpustat,diskstat,memstat,nicstat,pagestat])\n", + " \n", + " def get_perf_stat(self, **kwargs):\n", + " perfstat=[self.analysis[\"sar\"][l]['perfstat']['als'].get_stat() for l in self.clients]\n", + " return reduce(lambda l,r: l.join(r), perfstat)\n", + " \n", + " def get_summary(app, **kwargs):\n", + " output=[]\n", + " \n", + " appals=app.analysis[\"app\"][\"als\"]\n", + " \n", + " out=appals.get_query_time(plot=False)\n", + " \n", + " lrun=app.appid\n", + " \n", + " cmpcolumns=['runtime','disk spilled','shuffle_write','f_wait_time','input read','acc_task_time','output rows']\n", + " outcut=out[cmpcolumns]\n", + " \n", + " pdsout=pandas.DataFrame(outcut.sum(),columns=[lrun])\n", + " pdstime=pdsout \n", + "\n", + " if app.show_emon:\n", + " emondf=app.getemonmetric(**kwargs)\n", + " def get_agg(emondf):\n", + " aggs=[]\n", + " for x in emondf.index:\n", + " if x.endswith(\"avg\"):\n", + " aggs.append(emondf.loc[x].mean())\n", + " else:\n", + " aggs.append(emondf.loc[x].sum())\n", + "\n", + " emondf['agg']=aggs\n", + " return emondf\n", + " emondf=get_agg(emondf)\n", + "\n", + " emonsum=emondf[[\"agg\"]]\n", + "\n", + " emonsum.columns=[lrun]\n", + "\n", + " print(\"sar metric\")\n", + " sardf=app.get_sar_stat(**kwargs)\n", + " \n", + " def get_sar_agg(sardf):\n", + " aggs=[]\n", + " for x in sardf.index:\n", + " if \"total\" in x:\n", + " aggs.append(sardf.loc[x].sum())\n", + " elif \"max\" in x:\n", + " aggs.append(sardf.loc[x].max())\n", + " else:\n", + " aggs.append(sardf.loc[x].mean())\n", + "\n", + " sardf['agg']=aggs\n", + " return sardf\n", + " sardf=get_sar_agg(sardf)\n", + "\n", + " sarsum=sardf[[\"agg\"]]\n", + "\n", + " sarsum.columns=[lrun]\n", + " \n", + " summary=pandas.concat([pdstime,sarsum])\n", + " if app.show_emon:\n", + " summary=pandas.concat([summary,emonsum])\n", + " elif app.show_perfstat:\n", + " print(\"perf stat metric\")\n", + " perf_stat = app.get_perf_stat(**kwargs)\n", + " perf_stat = get_sar_agg(perf_stat)[['agg']]\n", + " perf_stat.columns=[lrun]\n", + " summary=pandas.concat([summary,perf_stat])\n", + " \n", + " df_sum=spark.createDataFrame(summary.T.reset_index())\n", + " for c in df_sum.columns:\n", + " df_sum=df_sum.withColumnRenamed(c,c.replace(\" \",\"_\").replace(\"(\",\"\").replace(\")\",\"\"))\n", + " df_sum.write.mode(\"overwrite\").parquet(app.filedir+\"summary.parquet\")\n", + " \n", + " return summary\n", + " \n", + " def compare_app(app2,**kwargs):\n", + " output=[]\n", + " \n", + " lbasedir=kwargs.get(\"basedir\",app2.basedir)\n", + " r_appid=kwargs.get(\"r_appid\",app2.appid)\n", + " \n", + " app=kwargs.get(\"rapp\",Application_Run(r_appid,basedir=lbasedir))\n", + "\n", + " show_queryplan_diff=kwargs.get(\"show_queryplan_diff\",True)\n", + " \n", + " queryids=kwargs.get(\"queryids\",None)\n", + " \n", + " appals=app.analysis[\"app\"][\"als\"]\n", + " appals2=app2.analysis[\"app\"][\"als\"]\n", + "\n", + " out=appals.get_query_time(plot=False)\n", + " out2=appals2.get_query_time(plot=False)\n", + "\n", + " lrun=app.appid\n", + " rrun=app2.appid\n", + " cmpcolumns=['runtime','shuffle_write','f_wait_time','input read','acc_task_time','output rows']\n", + " outcut=out[cmpcolumns]\n", + " out2cut=out2[cmpcolumns]\n", + " cmp=outcut.join(out2cut,lsuffix='_'+lrun,rsuffix='_'+rrun)\n", + "\n", + " pdsout=pandas.DataFrame(outcut.sum(),columns=[lrun])\n", + " pdsout2=pandas.DataFrame(out2cut.sum(),columns=[rrun])\n", + " pdstime=pdsout.join(pdsout2)\n", + "\n", + " showemon=app.show_emon and app2.show_emon\n", + " if showemon:\n", + " print(\"emon metric\")\n", + "\n", + " emondf=app.getemonmetric(**kwargs)\n", + " emondf2=app2.getemonmetric(**kwargs)\n", + " #in case we comare with two clsuter\n", + " emondf.columns=emondf2.columns\n", + " def get_agg(emondf):\n", + " aggs=[]\n", + " for x in emondf.index:\n", + " if x.endswith(\"avg\"):\n", + " aggs.append(emondf.loc[x].mean())\n", + " else:\n", + " aggs.append(emondf.loc[x].sum())\n", + "\n", + " emondf['agg']=aggs\n", + " return emondf\n", + " emondf=get_agg(emondf)\n", + " emondf2=get_agg(emondf2)\n", + "\n", + " emoncolumns=emondf.columns\n", + " emoncmp=emondf.join(emondf2,lsuffix='_'+lrun,rsuffix='_'+rrun)\n", + " emonsum=emoncmp[[\"agg_\"+lrun,\"agg_\"+rrun]]\n", + "\n", + " emonsum.columns=[lrun,rrun]\n", + "\n", + " print(\"sar metric\")\n", + " sardf=app.get_sar_stat(**kwargs)\n", + " sardf2=app2.get_sar_stat(**kwargs)\n", + " \n", + " def get_sar_agg(sardf):\n", + " aggs=[]\n", + " for x in sardf.index:\n", + " if \"total\" in x:\n", + " aggs.append(sardf.loc[x].sum())\n", + " elif \"max\" in x:\n", + " aggs.append(sardf.loc[x].max())\n", + " else:\n", + " aggs.append(sardf.loc[x].mean())\n", + "\n", + " sardf['agg']=aggs\n", + " return sardf\n", + " sardf=get_sar_agg(sardf)\n", + " sardf2=get_sar_agg(sardf2)\n", + " #in case we compare two clusters\n", + " sardf2.columns=sardf.columns\n", + "\n", + " sarcolumns=sardf.columns\n", + " sarcmp=sardf.join(sardf2,lsuffix='_'+lrun,rsuffix='_'+rrun)\n", + " sarsum=sarcmp[[\"agg_\"+lrun,\"agg_\"+rrun]]\n", + "\n", + " sarsum.columns=[lrun,rrun]\n", + " \n", + " summary=pandas.concat([pdstime,sarsum])\n", + " if showemon:\n", + " summary=pandas.concat([summary,emonsum])\n", + " \n", + " summary[\"diff\"]=numpy.where(summary[rrun] > 0, summary[lrun]/summary[rrun]-1, 0)\n", + " \n", + " \n", + " def highlight_diff(x):\n", + " styles=[]\n", + " mx=x.max()\n", + " mn=x.min()\n", + " mx=max(mx,-mn,0.2)\n", + " for j in x.index:\n", + " m1=(x[j])/mx*100 if x[j]!=None else 0\n", + " if m1>0:\n", + " styles.append(f'width: 400px ; background-image: linear-gradient(to right, transparent 50%, #5fba7d 50%, #5fba7d {50+m1/2}%, transparent {50+m1/2}%)')\n", + " else:\n", + " styles.append(f'width: 400px ;background-image: linear-gradient(to left, transparent 50%, #f1a863 50%, #f1a863 {50-m1/2}%, transparent {50-m1/2}%)')\n", + " return styles\n", + "\n", + " output.append(summary.style.apply(highlight_diff,subset=['diff']).format({lrun:\"{:,.2f}\",rrun:\"{:,.2f}\",'diff':\"{:,.2%}\"}).render())\n", + "\n", + " cmp_plot=cmp\n", + " cmp_plot['diff']=cmp_plot['runtime_'+lrun]-cmp_plot['runtime_'+rrun]\n", + "\n", + " pltx=cmp_plot.sort_values(by='diff',axis=0).plot.bar(y=['runtime_'+lrun,'runtime_'+rrun],figsize=(30,8))\n", + " better_num=sqldf('''select count(*) from cmp_plot where diff>0''')['count(*)'][0]\n", + " pltx.text(0.1, 0.8,'{:d} queries are better'.format(better_num), ha='center', va='center', transform=pltx.transAxes)\n", + "\n", + " df1 = pandas.DataFrame('', index=cmp.index, columns=cmpcolumns)\n", + " for l in cmpcolumns:\n", + " for j in cmp.index:\n", + " df1[l][j]=[cmp[l+\"_\"+lrun][j],cmp[l+\"_\"+rrun][j],cmp[l+\"_\"+lrun][j]/cmp[l+\"_\"+rrun][j]-1]\n", + "\n", + " def highlight_greater(x,columns):\n", + " df1 = pandas.DataFrame('', index=x.index, columns=x.columns)\n", + " for l in columns:\n", + " m={}\n", + " for j in x.index:\n", + " m[j] = (x[l][j][1] / x[l][j][0])*100 if x[l][j][0]!=0 else 100\n", + " mx=max(m.values())-100\n", + " mn=100-min(m.values())\n", + " mx=max(mx,mn)\n", + " for j in x.index:\n", + " m1=-(100-m[j])/mx*100 if x[l][j][0]!=0 else 0\n", + " if m1>0:\n", + " df1[l][j] = f'background-image: linear-gradient(to right, transparent 50%, #5fba7d 50%, #5fba7d {50+m1/2}%, transparent {50+m1/2}%)'\n", + " else:\n", + " df1[l][j] = f'background-image: linear-gradient(to left, transparent 50%, #f1a863 50%, #f1a863 {50-m1/2}%, transparent {50-m1/2}%)'\n", + "\n", + " return df1\n", + "\n", + " def display_compare(df,columns):\n", + " output.append(df.style.set_properties(**{'width': '300px','border-style':'solid','border-width':'1px'}).apply(lambda x: highlight_greater(x,columns), axis=None).format(lambda x: '''\n", + "
{:,.2f}
\n", + "
{:,.2f}
\n", + "
{:,.2f}%
\n", + " '''.format(x[0],x[1],x[2]*100)).render())\n", + " display_compare(df1,cmpcolumns)\n", + "\n", + " df3 = pandas.DataFrame('', index=sarcmp.index, columns=sarcolumns)\n", + " for l in sarcolumns:\n", + " for j in df3.index:\n", + " df3[l][j]=[sarcmp[l+\"_\"+lrun][j],sarcmp[l+\"_\"+rrun][j],sarcmp[l+\"_\"+lrun][j]/sarcmp[l+\"_\"+rrun][j]-1]\n", + " display_compare(df3,sarcolumns)\n", + "\n", + " if showemon:\n", + " df2 = pandas.DataFrame('', index=emoncmp.index, columns=emoncolumns)\n", + " for l in emoncolumns:\n", + " for j in df2.index:\n", + " df2[l][j]=[emoncmp[l+\"_\"+lrun][j],emoncmp[l+\"_\"+rrun][j],emoncmp[l+\"_\"+lrun][j]/emoncmp[l+\"_\"+rrun][j]-1]\n", + " display_compare(df2,emoncolumns)\n", + "\n", + " print(\"time breakdown\")\n", + " ################################ time breakdown ##################################################################################################\n", + " timel=appals.show_time_metric(plot=False)\n", + " timer=appals2.show_time_metric(plot=False)\n", + " timer.columns=[l.replace(\"scan time\",\"time_batchscan\") for l in timer.columns]\n", + " timel.columns=[l.replace(\"scan time\",\"time_batchscan\") for l in timel.columns]\n", + " rcols=timer.columns\n", + " lcols=[]\n", + " for c in [l.split(\"%\")[1][1:] for l in rcols]:\n", + " for t in timel.columns:\n", + " if t.endswith(c):\n", + " lcols.append(t)\n", + " for t in timel.columns:\n", + " if t not in lcols:\n", + " lcols.append(t)\n", + " timel_adj=timel[lcols]\n", + "\n", + " fig, axs = plt.subplots(nrows=1, ncols=2, sharey=True,figsize=(30,8),gridspec_kw = {'width_ratios':[1, 1]})\n", + " plt.subplots_adjust(wspace=0.01)\n", + " ax=timel_adj.plot.bar(ax=axs[0],stacked=True)\n", + " list_values=timel_adj.loc[0].values\n", + " for rect, value in zip(ax.patches, list_values):\n", + " h = rect.get_height() /2.\n", + " w = rect.get_width() /2.\n", + " x, y = rect.get_xy()\n", + " ax.text(x+w, y+h,\"{:,.2f}\".format(value),horizontalalignment='center',verticalalignment='center',color=\"white\")\n", + " ax=timer.plot.bar(ax=axs[1],stacked=True)\n", + " list_values=timer.loc[0].values\n", + " for rect, value in zip(ax.patches, list_values):\n", + " h = rect.get_height() /2.\n", + " w = rect.get_width() /2.\n", + " x, y = rect.get_xy()\n", + " ax.text(x+w, y+h,\"{:,.2f}\".format(value),horizontalalignment='center',verticalalignment='center',color=\"white\")\n", + "\n", + "################################ critical time breakdown ##################################################################################################\n", + " timel=appals.show_time_metric(plot=False,taskids=[l[0].item() for l in appals.criticaltasks])\n", + " timer=appals2.show_time_metric(plot=False,taskids=[l[0].item() for l in appals2.criticaltasks])\n", + " timer.columns=[l.replace(\"scan time\",\"time_batchscan\") for l in timer.columns]\n", + " timel.columns=[l.replace(\"scan time\",\"time_batchscan\") for l in timel.columns]\n", + " rcols=timer.columns\n", + " lcols=[]\n", + " for c in [l.split(\"%\")[1][1:] for l in rcols]:\n", + " for t in timel.columns:\n", + " if t.endswith(c):\n", + " lcols.append(t)\n", + " for t in timel.columns:\n", + " if t not in lcols:\n", + " lcols.append(t)\n", + " timel_adj=timel[lcols]\n", + "\n", + " fig, axs = plt.subplots(nrows=1, ncols=2, sharey=True,figsize=(30,8),gridspec_kw = {'width_ratios':[1, 1]})\n", + " plt.subplots_adjust(wspace=0.01)\n", + " ax=timel_adj.plot.bar(ax=axs[0],stacked=True)\n", + " list_values=timel_adj.loc[0].values\n", + " for rect, value in zip(ax.patches, list_values):\n", + " h = rect.get_height() /2.\n", + " w = rect.get_width() /2.\n", + " x, y = rect.get_xy()\n", + " ax.text(x+w, y+h,\"{:,.2f}\".format(value),horizontalalignment='center',verticalalignment='center',color=\"white\")\n", + " ax=timer.plot.bar(ax=axs[1],stacked=True)\n", + " list_values=timer.loc[0].values\n", + " for rect, value in zip(ax.patches, list_values):\n", + " h = rect.get_height() /2.\n", + " w = rect.get_width() /2.\n", + " x, y = rect.get_xy()\n", + " ax.text(x+w, y+h,\"{:,.2f}\".format(value),horizontalalignment='center',verticalalignment='center',color=\"white\")\n", + "\n", + "\n", + " ################################ hot stage ##########################################################################################################\n", + "\n", + " hotstagel=appals.get_hottest_stages(plot=False)\n", + " hotstager=appals2.get_hottest_stages(plot=False)\n", + " hotstagel.style.format(lambda x: '''{:,.2f}'''.format(x))\n", + "\n", + " norm = matplotlib.colors.Normalize(vmin=0, vmax=max(hotstager.queryid))\n", + " cmap = matplotlib.cm.get_cmap('brg')\n", + " def setbkcolor(x):\n", + " rgba=cmap(norm(x['queryid']))\n", + " return ['background-color:rgba({:d},{:d},{:d},1); color:white'.format(int(rgba[0]*255),int(rgba[1]*255),int(rgba[2]*255))]*9\n", + "\n", + " output.append(\"
\" + hotstagel.style.apply(setbkcolor,axis=1).format({\"total_time\":lambda x: '{:,.2f}'.format(x),\"stdev_time\":lambda x: '{:,.2f}'.format(x),\"acc_total\":lambda x: '{:,.2%}'.format(x),\"total\":lambda x: '{:,.2%}'.format(x)}).render()+\n", + " \"\" + hotstager.style.apply(setbkcolor,axis=1).format({\"total_time\":lambda x: '{:,.2f}'.format(x),\"stdev_time\":lambda x: '{:,.2f}'.format(x),\"acc_total\":lambda x: '{:,.2%}'.format(x),\"total\":lambda x: '{:,.2%}'.format(x)}).render()+ \"
\")\n", + "\n", + " if not show_queryplan_diff:\n", + " return \"\\n\".join(output)\n", + " \n", + " print(\"hot stage\")\n", + "\n", + " loperators=appals.getOperatorCount()\n", + " roperators=appals2.getOperatorCount()\n", + " loperators_rowcnt=appals.get_metric_output_rowcnt()\n", + " roperators_rowcnt=appals2.get_metric_output_rowcnt()\n", + " \n", + " def show_query_diff(queryid, always_show=True):\n", + " lops=pandas.DataFrame(loperators[queryid])\n", + " lops.columns=['calls_l']\n", + " lops=lops.loc[lops['calls_l'] >0]\n", + "\n", + " rops=pandas.DataFrame(roperators[queryid])\n", + " rops.columns=[\"calls_r\"]\n", + " rops=rops.loc[rops['calls_r'] >0]\n", + " lops_row=pandas.DataFrame(loperators_rowcnt[queryid])\n", + " lops_row.columns=[\"rows_l\"]\n", + " lops_row=lops_row.loc[lops_row['rows_l'] >0]\n", + "\n", + " rops_row=pandas.DataFrame(roperators_rowcnt[queryid])\n", + " rops_row.columns=[\"rows_r\"]\n", + " rops_row=rops_row.loc[rops_row['rows_r'] >0]\n", + "\n", + " opscmp=pandas.merge(pandas.merge(pandas.merge(lops,rops,how=\"outer\",left_index=True,right_index=True),lops_row,how=\"outer\",left_index=True,right_index=True),rops_row,how=\"outer\",left_index=True,right_index=True)\n", + " opscmp=opscmp.fillna(\"\")\n", + " \n", + " def set_bk_color_opscmp(x):\n", + " calls_l= 0 if x['calls_l']==\"\" else x['calls_l']\n", + " calls_r= 0 if x['calls_r']==\"\" else x['calls_r']\n", + " rows_l= 0 if x['rows_l']==\"\" else x['rows_l']\n", + " rows_r= 0 if x['rows_r']==\"\" else x['rows_r']\n", + "\n", + " if calls_l > calls_r or rows_l > rows_r:\n", + " return ['background-color:#eb6b34']*4\n", + " if calls_l < calls_r or rows_l < rows_r:\n", + " return ['background-color:#8ad158']*4\n", + " return ['color:#dbd4d0']*4\n", + "\n", + " if always_show or not (opscmp[\"rows_l\"].equals(opscmp[\"rows_r\"]) and opscmp[\"calls_l\"].equals(opscmp[\"calls_r\"])):\n", + " print(f\"query {queryid} queryplan diff \")\n", + " if not always_show:\n", + " output.append(f\"

query{queryid} is different

\")\n", + " output.append(opscmp.style.apply(set_bk_color_opscmp,axis=1).render())\n", + "\n", + " planl=appals.get_query_plan(queryid=queryid,show_plan_only=True,plot=False)\n", + " planr=appals2.get_query_plan(queryid=queryid,show_plan_only=True,plot=False)\n", + " output.append(\"
\"+planl+\"\"+planr+\"
\")\n", + "\n", + " outputx=df1['output rows']\n", + " runtimex = df1['runtime']\n", + " for x in outputx.index:\n", + " if runtimex[x][0]/runtimex[x][1]<0.95 or runtimex[x][0]/runtimex[x][1]>1.05:\n", + " output.append(f\"

query{x} is different,{lrun} time: {df1['runtime'][x][0]}, {rrun} time: {df1['runtime'][x][1]}

\")\n", + " if queryids is not None and x not in queryids:\n", + " print(\"query plan skipped\")\n", + " continue\n", + " try:\n", + " show_query_diff(x, True)\n", + " except:\n", + " print(\" query diff error\")\n", + " else:\n", + " try:\n", + " show_query_diff(x, False)\n", + " except:\n", + " print(\" query diff error\")\n", + " \n", + " return \"\\n\".join(output)\n", + " \n", + "\n", + " \n", + " def show_queryplan_diff(app2, queryid,**kwargs):\n", + " lbasedir=kwargs.get(\"basedir\",app2.basedir)\n", + " r_appid=kwargs.get(\"r_appid\",app2.appid)\n", + " \n", + " app=kwargs.get(\"rapp\",Application_Run(r_appid,basedir=lbasedir))\n", + "\n", + " appals=app.analysis[\"app\"][\"als\"]\n", + " appals2=app2.analysis[\"app\"][\"als\"]\n", + "\n", + " hotstagel=appals.get_hottest_stages(plot=False)\n", + " hotstager=appals2.get_hottest_stages(plot=False)\n", + " hotstagel.style.format(lambda x: '''{:,.2f}'''.format(x))\n", + "\n", + " loperators=appals.getOperatorCount()\n", + " roperators=appals2.getOperatorCount()\n", + " loperators_rowcnt=appals.get_metric_output_rowcnt()\n", + " roperators_rowcnt=appals2.get_metric_output_rowcnt()\n", + "\n", + " lrun=app.appid\n", + " rrun=app2.appid\n", + "\n", + " output=[]\n", + "\n", + " def show_query_diff(queryid):\n", + " lops=pandas.DataFrame(loperators[queryid])\n", + " lops.columns=['calls_l']\n", + " lops=lops.loc[lops['calls_l'] >0]\n", + "\n", + " rops=pandas.DataFrame(roperators[queryid])\n", + " rops.columns=[\"calls_r\"]\n", + " rops=rops.loc[rops['calls_r'] >0]\n", + " lops_row=pandas.DataFrame(loperators_rowcnt[queryid])\n", + " lops_row.columns=[\"rows_l\"]\n", + " lops_row=lops_row.loc[lops_row['rows_l'] >0]\n", + "\n", + " rops_row=pandas.DataFrame(roperators_rowcnt[queryid])\n", + " rops_row.columns=[\"rows_r\"]\n", + " rops_row=rops_row.loc[rops_row['rows_r'] >0]\n", + "\n", + " opscmp=pandas.merge(pandas.merge(pandas.merge(lops,rops,how=\"outer\",left_index=True,right_index=True),lops_row,how=\"outer\",left_index=True,right_index=True),rops_row,how=\"outer\",left_index=True,right_index=True)\n", + " opscmp=opscmp.fillna(\"\")\n", + "\n", + " def set_bk_color_opscmp(x):\n", + " calls_l= 0 if x['calls_l']==\"\" else x['calls_l']\n", + " calls_r= 0 if x['calls_r']==\"\" else x['calls_r']\n", + " rows_l= 0 if x['rows_l']==\"\" else x['rows_l']\n", + " rows_r= 0 if x['rows_r']==\"\" else x['rows_r']\n", + "\n", + " if calls_l > calls_r or rows_l > rows_r:\n", + " return ['background-color:#eb6b34']*4\n", + " if calls_l < calls_r or rows_l < rows_r:\n", + " return ['background-color:#8ad158']*4\n", + " return ['color:#dbd4d0']*4\n", + "\n", + " output.append(opscmp.style.apply(set_bk_color_opscmp,axis=1).render())\n", + "\n", + " planl=appals.get_query_plan(queryid=queryid,show_plan_only=True,plot=False)\n", + " planr=appals2.get_query_plan(queryid=queryid,show_plan_only=True,plot=False)\n", + " output.append(\"
\"+planl+\"\"+planr+\"
\")\n", + "\n", + " x=queryid\n", + " print(\"query \",x,\" queryplan diff \")\n", + " #output.append(f\"

query{x} is different,{lrun} time: {df1['runtime'][x][0]}, {rrun} time: {df1['runtime'][x][1]}

\")\n", + " show_query_diff(x)\n", + " display(HTML(\"\\n\".join(output)))\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# MISC" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "def reduce_metric(pdrst,slave_id,metric,core,agg_func):\n", + " pdrst['rst']=pdrst.apply(lambda x:x['app_id'].get_reduce_metric(slave_id,metric,core,agg_func), axis=1)\n", + " for l in agg_func:\n", + " pdrst[get_alias_name(metric,l)]=pdrst.apply(lambda x:x['rst'].iloc[0][get_alias_name(metric,l)],axis=1)\n", + " return pdrst.drop(columns=['rst'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def cvt_number(n):\n", + " try:\n", + " if str(n).isdigit():\n", + " return f'{n:,}'\n", + " else:\n", + " return f'{round(float(n),2):,}'\n", + " except ValueError:\n", + " return n\n", + "\n", + "def parse_changelog(changelog):\n", + " out=[]\n", + " if fs.exists(changelog):\n", + " with fs.open(changelog) as f:\n", + " for l in f.readlines():\n", + " l = l.decode('ascii')\n", + " if l.startswith(\"commit\"):\n", + " out.append(re.sub(r\"commit +(.+)\",r\"commit \\1\",l))\n", + " elif l.startswith(\"Author\"):\n", + " out.append(re.sub(r\"Author: +([^<]+) <(.+)>\",r\"Author: \\1 <\\2> \",l))\n", + " elif l.startswith(\"Date\"):\n", + " out.append(re.sub(r\"Date: +(\\d\\d\\d\\d-\\d\\d-\\d\\d)\",r\"Author: \\1\",l))\n", + " else:\n", + " out.append(l)\n", + " else:\n", + " out.append(f'{os.path.basename(changelog)} not found!')\n", + " return out\n", + "\n", + "def generate_query_diff(name, comp_name, query_time_file, comp_query_time_file):\n", + " result = []\n", + " if fs.exists(query_time_file) and fs.exists(comp_query_time_file):\n", + " result.append(['query', name, comp_name, 'difference', 'percentage'])\n", + " \n", + " qtimes = {}\n", + " comp_qtimes = {}\n", + " with fs.open(query_time_file) as f:\n", + " qtimes = json.loads(f.read().decode('ascii'))\n", + " with fs.open(comp_query_time_file) as f:\n", + " comp_qtimes = json.loads(f.read().decode('ascii'))\n", + " \n", + " query_ids = sorted(qtimes.keys(), key=lambda x: str(len(x))+x if x[-1] != 'a' and x[-1] != 'b' else str(len(x)-1) + x)\n", + " \n", + " if len(comp_qtimes) != len(qtimes):\n", + " raise Exception('Number of queries mismatch!')\n", + " \n", + " query_ids.append('total')\n", + " qtimes['total'] = sum([float(i) for i in qtimes.values()])\n", + " comp_qtimes['total'] = sum([float(i) for i in comp_qtimes.values()])\n", + " \n", + " for q in query_ids:\n", + " t1 = qtimes.get(q)\n", + " t2 = comp_qtimes.get(q)\n", + " delta = str(\"{:.2f}\".format(float(t2) - float(t1)))\n", + " perc = str(\"{:.2f}\".format((float(t2) / float(t1)) * 100)) + '%'\n", + " result.append([q, str(t1), str(t2), delta, perc])\n", + " return result\n", + "\n", + "def append_summary(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, statsall, output):\n", + " with open(output,\"a\") as linkfile:\n", + "\n", + " difftable=''' \n", + " '''\n", + " for k,v in statsall.items():\n", + " difftable+=f'''\n", + " \n", + " \n", + " \n", + " '''\n", + " difftable+='''\n", + " \n", + "
{k}{cvt_number(v)}
\\n'''\n", + " linkfile.write(difftable)\n", + " linkfile.write(\"\\n

\\n\")\n", + " \n", + " linkfile.write(\"\\n gluten gitlog in last 2 days
\\n\")\n", + " out=parse_changelog(os.path.join('/', base_dir, appid, 'changelog_gluten'))\n", + " linkfile.write(\"
\".join(out))\n", + " linkfile.write(\"\\n

\\n\")\n", + " \n", + " linkfile.write(\"\\n velox gitlog in last 2 days
\\n\")\n", + " out=parse_changelog(os.path.join('/', base_dir, appid, 'changelog_velox'))\n", + " linkfile.write(\"
\".join(out))\n", + " linkfile.write(\"\\n

\\n\")\n", + " \n", + " linkfile.write('''
\\n''')\n", + " \n", + " def append_query_diff(their_appid, their_base_dir, their_name):\n", + " query_diff=generate_query_diff(name, their_name, os.path.join('/', base_dir, appid, 'query_time.json'), os.path.join('/', their_base_dir, their_appid, 'query_time.json'))\n", + " if query_diff:\n", + " difftable='''\n", + " \n", + " '''\n", + " for l in query_diff:\n", + " difftable+='''\n", + " '''\n", + " base=0\n", + " pr=0\n", + " if re.match(r\"[0-9.]+\",l[1]):\n", + " base=float(l[1])\n", + " l[1]=\"{:.2f}\".format(base)\n", + " if re.match(r\"[0-9.]+\",l[2]):\n", + " pr=float(l[2])\n", + " l[2]=\"{:.2f}\".format(pr)\n", + "\n", + " for d in l:\n", + " color='#000000'\n", + " if base > pr:\n", + " color='#6F9915'\n", + " elif base < pr:\n", + " color='#F92663'\n", + " difftable += f'''\n", + " '''\n", + "\n", + " difftable+='''\n", + " '''\n", + "\n", + " difftable+='''\n", + " \n", + "
{d}
'''\n", + " linkfile.write(difftable)\n", + " linkfile.write(\"\\n

\\n\")\n", + " # return percentage\n", + " return query_diff[-1][-1]\n", + " return ''\n", + "\n", + " baseline_perc = ''\n", + " if comp_appid:\n", + " append_query_diff(comp_appid, comp_base_dir, comp_name)\n", + " if baseline_appid:\n", + " baseline_perc = append_query_diff(baseline_appid, baseline_base_dir, 'Vanilla Spark')\n", + "\n", + " linkfile.write(\"
\")\n", + " \n", + " return baseline_perc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_email_body_title(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, notebook, notebook_html, traceview, stats, summary, pr=''):\n", + " statsall=collections.OrderedDict()\n", + " for k,v in stats.items():\n", + " statsall[k]=v\n", + " for k,v in summary.to_dict()[appals.appid].items():\n", + " statsall[k]=v\n", + " \n", + " pr_link=''\n", + " if pr:\n", + " pr_link=f'https://github.com/apche/incubator-gluten/pull/{pr}'\n", + " title=!wget --quiet -O - $pr_link | sed -n -e 's!.*\\(.*\\).*!\\1!p'\n", + " pr_link=f'pr link: {title[0]}
'\n", + " \n", + " output=f'/tmp/{appid}.html'\n", + " with open(output, 'w+') as f:\n", + " f.writelines(f'''\n", + "\n", + "history event: http://{local_ip}:18080/tmp/sparkEventLog/{appid}/jobs/
\n", + "notebook: http://{local_ip}:8889/notebooks/{base_dir}/{notebook}
\n", + "notebook html: http://{local_ip}:8889/view/{base_dir}/{notebook_html}
\n", + "traceview: {traceview}
\n", + "{pr_link}\n", + "

''')\n", + " baseline_perc = append_summary(appid, base_dir, name, comp_appid, comp_base_dir, comp_name, baseline_appid, baseline_base_dir, statsall, output)\n", + " \n", + " title_prefix = f\"[ {datetime.now().strftime('%m_%d_%Y')} ]\" if not pr else f\"[ PR {pr} ]\"\n", + " title = f'{title_prefix} {name} {appid} {baseline_perc}'\n", + " return output,title" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true + }, + "source": [ + "# TPCDS query map" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "code_folding": [], + "hidden": true + }, + "outputs": [], + "source": [ + "m='''1\tq01\n", + " 2\tq02\n", + " 3\tq03\n", + " 4\tq04\n", + " 5\tq05\n", + " 6\tq06\n", + " 7\tq07\n", + " 8\tq08\n", + " 9\tq09\n", + " 10\tq10\n", + " 11\tq11\n", + " 12\tq12\n", + " 13\tq13\n", + " 14\tq14a\n", + " 15\tq14b\n", + " 16\tq15\n", + " 17\tq16\n", + " 18\tq17\n", + " 19\tq18\n", + " 20\tq19\n", + " 21\tq20\n", + " 22\tq21\n", + " 23\tq22\n", + " 24\tq23a\n", + " 25\tq23b\n", + " 26\tq24a\n", + " 27\tq24b\n", + " 28\tq25\n", + " 29\tq26\n", + " 30\tq27\n", + " 31\tq28\n", + " 32\tq29\n", + " 33\tq30\n", + " 34\tq31\n", + " 35\tq32\n", + " 36\tq33\n", + " 37\tq34\n", + " 38\tq35\n", + " 39\tq36\n", + " 40\tq37\n", + " 41\tq38\n", + " 42\tq39a\n", + " 43\tq39b\n", + " 44\tq40\n", + " 45\tq41\n", + " 46\tq42\n", + " 47\tq43\n", + " 48\tq44\n", + " 49\tq45\n", + " 50\tq46\n", + " 51\tq47\n", + " 52\tq48\n", + " 53\tq49\n", + " 54\tq50\n", + " 55\tq51\n", + " 56\tq52\n", + " 57\tq53\n", + " 58\tq54\n", + " 59\tq55\n", + " 60\tq56\n", + " 61\tq57\n", + " 62\tq58\n", + " 63\tq59\n", + " 64\tq60\n", + " 65\tq61\n", + " 66\tq62\n", + " 67\tq63\n", + " 68\tq64\n", + " 69\tq65\n", + " 70\tq66\n", + " 71\tq67\n", + " 72\tq68\n", + " 73\tq69\n", + " 74\tq70\n", + " 75\tq71\n", + " 76\tq72\n", + " 77\tq73\n", + " 78\tq74\n", + " 79\tq75\n", + " 80\tq76\n", + " 81\tq77\n", + " 82\tq78\n", + " 83\tq79\n", + " 84\tq80\n", + " 85\tq81\n", + " 86\tq82\n", + " 87\tq83\n", + " 88\tq84\n", + " 89\tq85\n", + " 90\tq86\n", + " 91\tq87\n", + " 92\tq88\n", + " 93\tq89\n", + " 94\tq90\n", + " 95\tq91\n", + " 96\tq92\n", + " 97\tq93\n", + " 98\tq94\n", + " 99\tq95\n", + " 100\tq96\n", + " 101\tq97\n", + " 102\tq98\n", + " 103\tq99'''.split(\"\\n\")\n", + "tpcds_query_map=[l.strip().split(\"\\t\") for l in m]\n", + "tpcds_query_map={int(l[0]):l[1] for l in tpcds_query_map}" + ] + } + ], + "metadata": { + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "nbTranslate": { + "displayLangs": [ + "*" + ], + "hotkey": "alt-t", + "langInMainMenu": true, + "sourceLang": "en", + "targetLang": "fr", + "useGoogleTranslate": true + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "298.275px", + "left": "1180px", + "top": "317.125px", + "width": "332px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tools/workload/benchmark_velox/build_gluten.sh b/tools/workload/benchmark_velox/build_gluten.sh new file mode 100755 index 000000000000..95872b9ed8b3 --- /dev/null +++ b/tools/workload/benchmark_velox/build_gluten.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +BASEDIR=$(dirname $0) +echo "Script called with: $0" +echo "BASEDIR resolved to: $BASEDIR" + +GLUTEN_HOME=$(realpath $BASEDIR/../../..) +echo "Located Gluten in: ${GLUTEN_HOME}" + +sudo rm -rf ${GLUTEN_HOME}/ep/build-velox/build/velox_ep/ || true + +spark_version=$(head -n1 $SPARK_HOME/RELEASE | awk '{print $2}') +short_version=${spark_version%.*} + +sed -i "s/3.2 3.3 3.4 3.5/$short_version/" $GLUTEN_HOME/dev/buildbundle-veloxbe.sh + +# Update local docker image to make more cache hit for vcpkg lib binary. +sudo docker pull apache/gluten:vcpkg-centos-7 + +sudo docker run --rm \ + -v ${GLUTEN_HOME}:/root/gluten \ + -v ${HOME}/.cache/vcpkg:/root/.cache/vcpkg \ + -v ${HOME}/.m2:/root/.m2 \ + -v ${HOME}/.ccache:/root/.ccache \ + -e http_proxy \ + -e https_proxy \ + --workdir /root/gluten \ + apache/gluten:vcpkg-centos-7 \ + ./dev/package-vcpkg.sh + diff --git a/tools/workload/benchmark_velox/emon.list b/tools/workload/benchmark_velox/emon.list new file mode 100644 index 000000000000..552bfefca0f1 --- /dev/null +++ b/tools/workload/benchmark_velox/emon.list @@ -0,0 +1,10 @@ +-q -c -experimental -t0.5 -l100000 -u +-C ( + +INST_RETIRED.ANY +CPU_CLK_UNHALTED.REF_TSC +CPU_CLK_UNHALTED.THREAD +MSR_EVENT:msr=0x611:type=FREERUN:scope=PACKAGE + +) + diff --git a/tools/workload/benchmark_velox/initialize.ipynb b/tools/workload/benchmark_velox/initialize.ipynb index cbbc27686951..30574f8c16eb 100644 --- a/tools/workload/benchmark_velox/initialize.ipynb +++ b/tools/workload/benchmark_velox/initialize.ipynb @@ -2,34 +2,28 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "heading_collapsed": true - }, + "metadata": {}, "source": [ "# System Setup" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "**1. Install system dependencies and python packages. Prepare the environment.**" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "First, install all dependencies and python packages as `root`. Run commands and make sure the installations are successful.\n", "\n", "```bash\n", "apt update\n", "\n", - "apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex\n", + "apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex linux-tools-common linux-tools-generic linux-tools-`uname -r` mailutils\n", "\n", "python3 -m pip install notebook==6.5.2\n", "python3 -m pip install jupyter_server==1.23.4\n", @@ -45,9 +39,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "***Required for Ubuntu***\n", "\n", @@ -73,18 +65,14 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "**2. Format and mount disks**" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Create a python virtual environment to finish the system setup process:\n", "\n", @@ -101,18 +89,14 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Run script [init_disks.py](./init_disks.py) to format and mount disks. **Be careful when choosing the disks to format.** If you see errors like `device or resource busy`, perhaps the partition has been mounted, you should unmount it first. If you still see this error, reboot the system and try again." ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Exit `venv`:\n", "```bash\n", @@ -122,18 +106,14 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "**3. Create user `sparkuser`**" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Create user `sparkuser` without password and with sudo priviledge. It's recommended to use one of the disks as the home directory instead of the system drive.\n", "\n", @@ -151,9 +131,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Generate ssh keys for `sparkuser`\n", "\n", @@ -172,9 +150,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Generate ssh keys for `root`, and enable no password ssh from `sparkuser`\n", "\n", @@ -188,9 +164,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Login to `sparkuser` and run the first-time ssh to the `root`\n", "\n", @@ -207,9 +181,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "***Required for Ubuntu***\n", "\n", @@ -222,18 +194,14 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "**4. Configure jupyter notebook**" ] }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "As `sparkuser`, install python packages\n", "\n", @@ -252,9 +220,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Configure jupyter notebook. Setup password when it prompts\n", "\n", @@ -341,9 +307,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Clone Gluten\n", "\n", @@ -355,9 +319,7 @@ }, { "cell_type": "markdown", - "metadata": { - "hidden": true - }, + "metadata": {}, "source": [ "Start jupyter notebook\n", "\n", @@ -546,7 +508,7 @@ "source": [ "for l in clients:\n", " !ssh root@{l} apt update > /dev/null 2>&1\n", - " !ssh root@{l} apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex > /dev/null 2>&1" + " !ssh root@{l} apt install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev openjdk-8-jdk maven vim pip sysstat gcc-9 libjemalloc-dev nvme-cli curl zip unzip bison flex linux-tools-common linux-tools-generic linux-tools-`uname -r` > /dev/null 2>&1" ] }, { @@ -1713,7 +1675,26 @@ "heading_collapsed": true }, "source": [ - "# Configure startup" + "# Configure monitor & startups" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "!cd ~\n", + "!git clone https://github.com/trailofbits/tsc_freq_khz.git\n", + "\n", + "for l in clients:\n", + " !scp -r tsc_freq_khz {l}:~/\n", + "\n", + "for l in hclients:\n", + " !ssh {l} 'cd tsc_freq_khz && make && sudo insmod ./tsc_freq_khz.ko' >/dev/null 2>&1\n", + " !ssh root@{l} 'dmesg | grep tsc_freq_khz'" ] }, { @@ -1732,6 +1713,10 @@ "end=$(($(nproc) - 1))\n", "for i in $(seq 0 $end); do echo performance > /sys/devices/system/cpu/cpu$i/cpufreq/scaling_governor; done\n", "for file in $(find /sys/devices/system/cpu/cpu*/power/energy_perf_bias); do echo \"0\" > $file; done\n", + "\n", + "if [ -d /home/{user}/sep_installed ]; then\n", + " /home/{user}/sep_installed/sepdk/src/insmod-sep -g {user}\n", + "fi\n", "'''\n", "\n", "with open('/tmp/tmpstartup', 'w') as f:\n", @@ -1761,6 +1746,124 @@ " !ssh $l \"sudo systemctl status mystartup.service\"" ] }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "## Install Emon" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + " Get the latest offline installer from [link](https://www.intel.com/content/www/us/en/developer/tools/oneapi/vtune-profiler-download.html?operatingsystem=linux&linux-install-type=offline) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "offline_installer = 'https://registrationcenter-download.intel.com/akdlm/IRC_NAS/e7797b12-ce87-4df0-aa09-df4a272fc5d9/intel-vtune-2025.0.0.1130_offline.sh'\n", + "for l in hclients:\n", + " !ssh {l} \"wget {offline_installer} -q && chmod +x intel-vtune-2025.0.0.1130_offline.sh\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} \"sudo ./intel-vtune-2025.0.0.1130_offline.sh -a -c -s --eula accept\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} \"sudo chown -R {user}:{user} /opt/intel/oneapi/vtune/ && rm -f sep_installed && ln -s /opt/intel/oneapi/vtune/latest sep_installed\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} \"cd sep_installed/sepdk/src/; echo -e \\\"\\\\n\\\\n\\\\n\\\" | ./build-driver\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh root@{l} \"/home/{user}/sep_installed/sepdk/src/rmmod-sep && /home/{user}/sep_installed/sepdk/src/insmod-sep -g {user}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} \"source /home/{user}/sep_installed/sep_vars.sh > /dev/null 2>&1; emon -v | head -n 1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in hclients:\n", + " !ssh {l} 'echo \"source /home/{user}/sep_installed/sep_vars.sh > /dev/null 2>&1\" >> ~/.bashrc'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for c in hclients:\n", + " !ssh {c} 'tail -n1 ~/.bashrc'" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1966,6 +2069,15 @@ "!sudo -E apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin >/dev/null 2>&1" ] }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Configure docker proxy" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1987,7 +2099,32 @@ "{}\n", "'''.format(f'Environment=\"HTTP_PROXY={http_proxy}\"' if http_proxy else '', f'Environment=\"HTTPS_PROXY={https_proxy}\"' if https_proxy else '')\n", " f.writelines(s)\n", - " !sudo cp /tmp/http-proxy.conf /etc/systemd/system/docker.service.d" + " !sudo cp /tmp/http-proxy.conf /etc/systemd/system/docker.service.d\n", + " \n", + " !ssh root@localhost \"mkdir -p /root/.docker\"\n", + " with open(f'/tmp/config.json', 'w') as f:\n", + " s = f'''\n", + "{{\n", + " \"proxies\": {{\n", + " \"default\": {{\n", + " \"httpProxy\": \"{http_proxy}\",\n", + " \"httpsProxy\": \"{https_proxy}\",\n", + " \"noProxy\": \"127.0.0.0/8\"\n", + " }}\n", + " }}\n", + "}}\n", + " '''\n", + " f.writelines(s)\n", + " !ssh root@localhost \"cp -f /tmp/config.json /root/.docker\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Configure maven proxy" ] }, { @@ -2079,46 +2216,7 @@ }, "outputs": [], "source": [ - "!sudo docker pull apache/gluten:vcpkg-centos-7" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "import os\n", - "http_proxy=os.getenv('http_proxy')\n", - "https_proxy=os.getenv('https_proxy')\n", - "\n", - "container=!sudo docker run -e http_proxy={http_proxy} -e https_proxy={https_proxy} -itd apache/gluten:vcpkg-centos-7\n", - "containerid = container[0]\n", - "containerid" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "!sudo docker exec {containerid} bash -c \"cd /opt && git clone https://github.com/apache/incubator-gluten.git gluten\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "hidden": true - }, - "outputs": [], - "source": [ - "!sudo docker exec {containerid} bash -c \"cd /opt && source /opt/rh/devtoolset-9/enable && cd gluten && ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --enable_hdfs=ON > build.log\"" + "%cd ~" ] }, { @@ -2130,9 +2228,8 @@ "outputs": [], "source": [ "import os\n", - "if os.path.exists(f'/home/{user}/.m2/settings.xml'):\n", - " !sudo docker exec {containerid} bash -c \"mkdir -p ~/.m2\"\n", - " !sudo docker cp /home/{user}/.m2/settings.xml {containerid}:/root/.m2/settings.xml" + "if not os.path.exists('gluten'):\n", + " !git clone https://github.com/apache/incubator-gluten.git gluten" ] }, { @@ -2143,7 +2240,7 @@ }, "outputs": [], "source": [ - "!sudo docker exec {containerid} bash -c \"cd /opt/gluten && mvn clean package -DskipTests -Pspark-3.3 -Pbackends-velox\"" + "%cd ~/gluten/tools/workload/benchmark_velox" ] }, { @@ -2154,7 +2251,7 @@ }, "outputs": [], "source": [ - "!sudo docker cp {containerid}:/opt/gluten/package/target/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar ~/" + "!bash build_gluten.sh" ] }, { @@ -2165,8 +2262,8 @@ }, "outputs": [], "source": [ - "for l in clients:\n", - " !scp ~/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-SNAPSHOT.jar {l}:~/" + "for l in hclients:\n", + " !scp ~/gluten/package/target/gluten-velox-bundle-spark*.jar {l}:~/" ] }, { @@ -2761,16 +2858,35 @@ "heading_collapsed": true }, "source": [ - "# Install Trace-Viewer (optional)" + "# Set up perf analysis tools (optional)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "We have a set of perf analysis scripts under $GLUTEN_HOME/tools/workload/benchmark_velox/analysis. You can follow below steps to deploy the scripts on the same cluster and use them for performance analysis after each run." ] }, { "cell_type": "markdown", "metadata": { + "heading_collapsed": true, "hidden": true }, "source": [ - "Clone the master branch\n", + "## Install and deploy Trace-Viewer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Clone the master branch of project catapult:\n", "```\n", "cd ~\n", "git clone https://github.com/catapult-project/catapult.git -b master\n", @@ -2783,11 +2899,11 @@ "hidden": true }, "source": [ - "Trace-Viewer requires python version 2.7. Create a virtualenv for python2.7\n", + "Trace-Viewer requires python version 2.7. Create a virtualenv for python2.7:\n", "```\n", "sudo apt install -y python2.7\n", - "virtualenv -p /usr/bin/python2.7 py27\n", - "source py27/bin/activate\n", + "virtualenv -p /usr/bin/python2.7 py27-env\n", + "source py27-env/bin/activate\n", "```" ] }, @@ -2797,7 +2913,7 @@ "hidden": true }, "source": [ - "Apply patch\n", + "Apply patch:\n", "\n", "```\n", "cd catapult\n", @@ -2832,13 +2948,99 @@ "hidden": true }, "source": [ - "Start the service\n", + "Start the service:\n", "\n", "```\n", "mkdir -p ~/trace_result\n", "cd ~/catapult && nohup ./bin/run_dev_server --no-install-hooks -d ~/trace_result -p1088 &\n", "```" ] + }, + { + "cell_type": "markdown", + "metadata": { + "heading_collapsed": true, + "hidden": true + }, + "source": [ + "## Deploy perf analysis scripts" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Create a virtualenv to run the perf analaysis scripts:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "\n", + "```bash\n", + "cd ~\n", + "virtualenv -p python3 -v paus-env\n", + "source paus-env/bin/activate\n", + "python3 -m pip install -r ~/gluten/tools/workload/benchmark_velox/analysis/requirements.txt\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "\n", + "We will put all perf analysis notebooks under `$HOME/PAUS`. Create the directory and start the notebook:\n", + "\n", + "```bash\n", + "mkdir -p ~/PAUS\n", + "cd ~/PAUS\n", + "nohup jupyter notebook --ip=0.0.0.0 --port=8889 &\n", + "```\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Package the virtual environment so that it can be distributed to other nodes:\n", + "```bash\n", + "cd ~\n", + "tar -czf paus-env.tar.gz paus-env\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "hidden": true + }, + "source": [ + "Distribute to the worker nodes:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "hidden": true + }, + "outputs": [], + "source": [ + "for l in clients:\n", + " !scp ~/paus-env.tar.gz {l}:~/\n", + " !ssh {l} tar -zxf paus-env.tar.gz" + ] } ], "metadata": { @@ -2858,7 +3060,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" }, "nbTranslate": { "displayLangs": [ diff --git a/tools/workload/benchmark_velox/native_sql_initialize.ipynb b/tools/workload/benchmark_velox/native_sql_initialize.ipynb index ee6bf443f6b4..0772232d70c9 100644 --- a/tools/workload/benchmark_velox/native_sql_initialize.ipynb +++ b/tools/workload/benchmark_velox/native_sql_initialize.ipynb @@ -69,6 +69,20 @@ "# System Settings" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from pathlib import Path\n", + "home = os.path.realpath(str(Path.home()))\n", + "cwd = os.getcwd()\n", + "print(f'home: {home}')\n", + "print(f'cwd: {cwd}')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -237,7 +251,7 @@ "import spylon_kernel\n", "from collections import namedtuple\n", "from concurrent.futures import ThreadPoolExecutor\n", - "from datetime import date\n", + "from datetime import date, datetime\n", "from functools import reduce\n", "from IPython.display import display, HTML\n", "from matplotlib import rcParams\n", @@ -273,24 +287,25 @@ "import socket\n", "import os\n", "import sys\n", - "\n", - "from pathlib import Path\n", - "home = str(Path.home())\n", + "import json\n", "\n", "def upload_profile(server, base_dir, appid):\n", " local_profile_dir = os.path.join(home, 'profile')\n", " !mkdir -p {local_profile_dir}\n", - " !cd {local_profile_dir}; rm -f {appid}.tar.gz; tar zcvf {appid}.tar.gz {appid} >/dev/null 2>&1\n", + " !(cd {local_profile_dir}; rm -f {appid}.tar.gz; tar zcvf {appid}.tar.gz {appid}) >/dev/null 2>&1\n", " \n", " server_local_dir=os.path.join('PAUS', base_dir)\n", " server_local_profile_dir=os.path.join(server_local_dir, 'profile')\n", " server_hdfs_dir=f'/{base_dir}/'\n", "\n", " !ssh {server} \"mkdir -p {server_local_profile_dir}\"\n", - " !ssh {server} \"cd {server_local_profile_dir}; rm {appid}.tar.gz; rm -r {appid} >/dev/null 2>&1\"\n", + " !ssh {server} \"cd {server_local_profile_dir} && rm {appid}.tar.gz >/dev/null 2>&1\"\n", + " !ssh {server} \"cd {server_local_profile_dir} && rm -r {appid} >/dev/null 2>&1\"\n", " !scp {local_profile_dir}/{appid}.tar.gz {server}:{server_local_profile_dir}/\n", " !ssh {server} \"cd {server_local_profile_dir} && tar zxf {appid}.tar.gz\"\n", - " !ssh {server} \"hdfs dfs -mkdir -p {server_hdfs_dir}; hdfs dfs -rm -r {server_hdfs_dir}{appid}; hdfs dfs -put {server_local_profile_dir}/{appid} {server_hdfs_dir}\"\n", + " !ssh {server} \"hdfs dfs -mkdir -p {server_hdfs_dir}\"\n", + " !ssh {server} \"hdfs dfs -rm -r {server_hdfs_dir}{appid} >/dev/null 2>&1\"\n", + " !ssh {server} \"hdfs dfs -put {server_local_profile_dir}/{appid} {server_hdfs_dir}\"\n", " !ssh {server} \"cd {server_local_profile_dir}; rm {appid}.tar.gz; rm -r {appid}\"\n", "\n", "def killsar(clients):\n", @@ -306,6 +321,8 @@ " out=!ssh $l \"ps aux | grep -w perf | grep -v grep | tr -s ' ' | cut -d' ' -f2\"\n", " for x in out:\n", " !ssh root@$l \"kill $x > /dev/null 2>&1\"\n", + " for l in clients:\n", + " !ssh $l \"emon -stop > /dev/null 2>&1\"\n", "\n", "def killnumactl(clients):\n", " for l in clients:\n", @@ -313,7 +330,7 @@ " for x in out:\n", " !ssh $l \"kill $x > /dev/null 2>&1\"\n", "\n", - "def startmonitor(clients,appid,**kwargs):\n", + "def startmonitor(clients, appid, collect_emon, **kwargs):\n", " local_profile_dir=os.path.join(home, 'profile')\n", " prof=os.path.join(local_profile_dir, appid)\n", " !mkdir -p {prof}\n", @@ -323,6 +340,11 @@ " \n", " killsar(clients)\n", " \n", + " if collect_emon:\n", + " !cp -f {emon_list} {home}/emon.list\n", + " for l in clients:\n", + " !scp {home}/emon.list {l}:{home}/emon.list > /dev/null 2>&1\n", + " \n", " perfsyscalls=kwargs.get(\"collect_perf_syscall\",None)\n", " \n", " for l in clients:\n", @@ -331,13 +353,19 @@ " !ssh {l} mkdir -p {prof_client}\n", " !ssh {l} \"sar -o {prof_client}/sar.bin -r -u -d -B -n DEV 1 >/dev/null 2>&1 &\"\n", " !ssh root@{l} \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f 1 | xargs -I % bash -c '(cat /proc/%/status >> {prof_client}/%.stat; cat /proc/%/io >> {prof_client}/%.stat)'\"\n", + " if collect_emon:\n", + " !ssh {l} \"emon -i {home}/emon.list -f {prof_client}/emon.rst >/dev/null 2>&1 & \"\n", + " else:\n", + " !ssh root@{l} \"perf stat -e 'instructions,cycles,cpu_clk_unhalted.thread,cpu_clk_unhalted.ref_tsc' -a -I 500 -o {prof_client}/perfstat.txt >/dev/null 2>&1 & \"\n", + " !ssh {l} \"cat /sys/devices/system/cpu/cpu0/tsc_freq_khz | xargs -I% echo %000 > {prof_client}/tsc_freq 2>/dev/null &\"\n", + " !ssh {l} \"lscpu | grep '^CPU(s):' | cut -d ':' -f 2 | tr -d ' ' > {prof_client}/totalcores 2>/dev/null &\"\n", " if kwargs.get(\"collect_pid\",False):\n", " !ssh {l} \"jps | grep CoarseGrainedExecutorBackend | head -n 1 | cut -d' ' -f 1 | xargs -I % pidstat -h -t -p % 1 > {prof_client}/pidstat.out 2>/dev/null &\"\n", " !ssh root@{l} 'cat /proc/uptime | cut -d\" \" -f 1 | xargs -I ^ date -d \"- ^ seconds\" +%s.%N' > $prof/$l/uptime.txt\n", " if kwargs.get(\"collect_sched\",False):\n", " !ssh root@{l} 'perf trace -e \"sched:sched_switch\" -C 8-15 -o {prof_client}/sched.txt -T -- sleep 10000 >/dev/null 2>/dev/null &'\n", " if perfsyscalls is not None:\n", - " !ssh root@{l} \"perf stat -e 'syscalls:sys_exit_poll,syscalls:sys_exit_epoll_wait' -a -I 1000 -o {prof_client}/perfstat.txt >/dev/null 2>&1 & \"\n", + " !ssh root@{l} \"perf stat -e 'syscalls:sys_exit_poll,syscalls:sys_exit_epoll_wait' -a -I 1000 -o {prof_client}/perfsyscalls.txt >/dev/null 2>&1 & \"\n", " if kwargs.get(\"collect_hbm\",False):\n", " hbm_nodes = kwargs.get(\"hbm_nodes\")\n", " if hbm_nodes is not None:\n", @@ -350,9 +378,8 @@ " !ssh $hbm_l \"while :; do echo \\$(numactl -H | grep '$hbm_numa_nodes' | grep 'size' | awk '{ print \\$4 }' | awk '{ s += \\$1 } END { print s }'), \\$(numactl -H | grep '$hbm_numa_nodes' | grep 'free' | awk '{ print \\$4 }' | awk '{ s += \\$1 } END { print s }') | ts '%Y-%m-%d %H:%M:%S,' >> $hbm_prof/$hbm_l/numactl.csv; sleep 1; done >/dev/null 2>&1 &\"\n", " else:\n", " print(\"Missing argument: hbm_nodes. e.g. hbm_nodes = list(range(8,16))\")\n", - " return prof\n", "\n", - "def stopmonitor(clients, sc, appid, **kwargs):\n", + "def stopmonitor(clients, sc, appid, result, collect_emon, **kwargs):\n", " %cd ~\n", " \n", " local_profile_dir=os.path.join(home, 'profile')\n", @@ -360,23 +387,31 @@ " !mkdir -p {prof}\n", "\n", " killsar(clients)\n", - " killnumactl(clients) \n", - " \n", - " with open(f\"{prof}/starttime\",\"w\") as f:\n", - " f.write(\"{:d}\".format(int(time.time()*1000)))\n", + " killnumactl(clients)\n", " \n", " for l in clients:\n", " prof_client=os.path.join(prof, l)\n", " !ssh {l} \"sar -f {prof_client}/sar.bin -r > {prof_client}/sar_mem.sar;sar -f {prof_client}/sar.bin -u > {prof_client}/sar_cpu.sar;sar -f {prof_client}/sar.bin -d -p > {prof_client}/sar_disk.sar;sar -f {prof_client}/sar.bin -n DEV > {prof_client}/sar_nic.sar;sar -f {prof_client}/sar.bin -B > {prof_client}/sar_page.sar;\" \n", " !ssh root@{l} \"jps | grep CoarseGrainedExecutorBackend | cut -d' ' -f 1 | xargs -I % bash -c '(cat /proc/%/status >> {prof_client}/%.stat; cat /proc/%/io >> {prof_client}/%.stat)'\"\n", + " if collect_emon:\n", + " !ssh {l} \"source ~/sep_install/sep_vars.sh>/dev/null 2>&1; emon -v \" > {prof}/{l}/emonv.txt\n", " !ssh {l} \"sar -V \" > {prof_client}/sarv.txt\n", - " !test -f {prof_client}/perfstat.txt && head -n 1 {prof_client}/perfstat.txt > {prof_client}/perfstarttime\n", + " !ssh {l} \"test -f {prof_client}/perfstat.txt && head -n 1 {prof_client}/perfstat.txt > {prof_client}/perfstarttime\"\n", " if l!= socket.gethostname():\n", " !scp -r {l}:{prof_client} {prof}/ > /dev/null 2>&1\n", " \n", " if sc is not None:\n", " sc.stop()\n", + " \n", + " !git --git-dir=\"{gluten_home}/.git\" log --format=\"commit %H%nAuthor: %an <%ae>%nDate: %cs%n %n %s %n\" --since=`date --date='2 days ago' +'%m/%d/%Y'` > {prof}/changelog_gluten\n", + " !git --git-dir=\"{gluten_home}/ep/build-velox/build/velox_ep/.git\" log --format=\"commit %H%nAuthor: %an <%ae>%nDate: %cs%n %n %s %n\" --since=`date --date='2 days ago' +'%m/%d/%Y'` > {prof}/changelog_velox\n", + " \n", + " with open(f\"{prof}/starttime\",\"w\") as f:\n", + " f.write(\"{:d}\".format(int(time.time()*1000)))\n", " \n", + " with open(f'{prof}/query_time.json', 'w') as f:\n", + " json.dump(result, f)\n", + "\n", " if hdfs_event_dir != '':\n", " !hadoop fs -copyToLocal {hdfs_event_dir}/{appid} {prof}/app.log\n", " elif local_event_dir != '':\n", @@ -731,6 +766,32 @@ " return etc_gmt" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_last_run(records_file, appid=''):\n", + " if os.path.exists(records_file):\n", + " if appid:\n", + " lines=!tail -n2 $records_file\n", + " if len(lines) == 2:\n", + " # Check appid match\n", + " last_appid = lines[1].split('\\t')[1]\n", + " if last_appid != appid:\n", + " print(f'appid not match. Required {appid}. Got {last_appid}')\n", + " else:\n", + " l=lines[0].split('\\t')\n", + " return l[1],l[2],l[3]\n", + " else:\n", + " lines=!tail -n1 $records_file\n", + " if len(lines) == 1:\n", + " l=lines[0].split('\\t')\n", + " return l[1],l[2],l[3]\n", + " return None, None, None" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -765,10 +826,16 @@ " tpctables=[]\n", " tpc_query_path = ''\n", " \n", + " RECORDS_SPARK_TPCH = f\"records_spark_tpch.csv\"\n", + " RECORDS_SPARK_TPCDS = f\"records_spark_tpcds.csv\"\n", + " RECORDS_GLUTEN_TPCH = f\"records_gluten_tpch.csv\"\n", + " RECORDS_GLUTEN_TPCDS = f\"records_gluten_tpcds.csv\"\n", + " \n", " def __init__(self, spark, table_dir, run_gluten, workload, server, base_dir, nb_name, data_source = 'parquet'):\n", " self.spark = spark\n", " self.sc = spark.sparkSession.sparkContext\n", " self.appid = self.sc.applicationId\n", + " self.app_name = '_'.join(self.sc.appName.split(' '))\n", " self.run_gluten = run_gluten\n", " self.workload = workload\n", " self.table_dir = table_dir\n", @@ -778,7 +845,9 @@ " self.data_source = data_source\n", " self.table_loaded = False\n", " self.result = {}\n", + " self.duration = 0\n", " self.stopped = False\n", + " self.collect_emon = False\n", " self.perf_html = ''\n", " self.finished_nb = ''\n", " for l in os.listdir(self.tpc_query_path):\n", @@ -788,40 +857,65 @@ " self.query_ids = sorted(self.query_infos.keys(), key=lambda x: str(len(x))+x if x[-1] != 'a' and x[-1] != 'b' else str(len(x)-1) + x)\n", " print(\"http://{}:18080/history/{}/jobs/\".format(local_ip, self.sc.applicationId))\n", " \n", - " def start_monitor(self, clients, **kw):\n", - " startmonitor(clients, self.appid, **kw)\n", + " def start_monitor(self, clients, emon_list='', **kw):\n", + " if emon_list:\n", + " self.collect_emon = True\n", + " startmonitor(clients, self.appid, self.collect_emon, **kw)\n", " \n", " def stop_monitor(self, clients, **kw):\n", " if self.stopped:\n", " return\n", - " stopmonitor(clients, self.sc, self.appid, **kw)\n", - " if self.server:\n", - " output_nb = f'{self.nb_name[:-6]}-{self.appid}.ipynb'\n", - " if output_nb.startswith(home):\n", - " output_nb_name = os.path.relpath(output_nb, home)\n", + " stopmonitor(clients, self.sc, self.appid, self.result, self.collect_emon, **kw)\n", + "\n", + " output_nb = f'{self.nb_name[:-6]}-{self.appid}.ipynb'\n", + " \n", + " record_file = ''\n", + " if self.workload == 'tpch':\n", + " if self.run_gluten:\n", + " record_file = self.RECORDS_GLUTEN_TPCH\n", + " else:\n", + " record_file = self.RECORDS_SPARK_TPCH\n", + " else:\n", + " if self.run_gluten:\n", + " record_file = self.RECORDS_GLUTEN_TPCDS\n", " else:\n", - " output_nb_name = output_nb\n", - " output_nb_dir = os.path.dirname(output_nb_name)\n", - " server_nb_dir = os.path.join('PAUS', self.base_dir, output_nb_dir)\n", - " !ssh {self.server} \"mkdir -p {server_nb_dir}\"\n", - " !scp {output_nb} {self.server}:{server_nb_dir}\n", - " self.finished_nb = f\"http://{self.server}:8888/tree/{self.base_dir}/{output_nb_name}\"\n", + " record_file = self.RECORDS_SPARK_TPCDS\n", + " record_file = os.path.join(cwd, record_file)\n", + " with open(record_file, 'a+') as f:\n", + " f.write(f'{datetime.now()}\\t{self.appid}\\t{self.base_dir}\\t{self.app_name}\\t{output_nb}\\t{self.duration}')\n", + "\n", + " if self.server:\n", + " if output_nb.startswith(cwd):\n", + " output_nb = os.path.relpath(output_nb, cwd)\n", + " self.finished_nb = f\"http://{localhost}:8888/tree/{output_nb}\"\n", + " upload_profile(self.server, self.base_dir, self.appid)\n", + " \n", " self.stopped = True\n", "\n", - " def run_perf_analysis(self, disk_dev, nic_dev):\n", + " def run_perf_analysis(self, disk_dev, nic_dev, proxy, emails):\n", " if not self.server:\n", " return\n", "\n", - " upload_profile(self.server, self.base_dir, self.appid)\n", - "\n", - " ts=time.strftime(\"%Y_%m_%d_%H%M%S\")\n", - " name=f'{self.workload}_gluten' if self.run_gluten else f'{self.workload}_vanilla'\n", " run_script=f'{gluten_home}/tools/workload/benchmark_velox/analysis/run_perf_analysis.sh'\n", " \n", " disk=','.join(disk_dev)\n", " nic=','.join(nic_dev)\n", "\n", - " command =' '.join(['bash', run_script, '--ts', ts, '--base-dir', self.base_dir, '--name', name, '--appid', self.appid, '--disk', disk, '--nic', nic, '--tz', convert_to_etc_gmt()])\n", + " command =' '.join(['bash', run_script, '--base-dir', self.base_dir, '--name', self.app_name, '--appid', self.appid, '--disk', disk, '--nic', nic, '--tz', convert_to_etc_gmt(), '--proxy', proxy if proxy != '' else \"''\", '--emails', ','.join(emails) if emails else \"''\"])\n", + " \n", + " if self.run_gluten:\n", + " if self.workload == 'tpch':\n", + " comp_file = os.path.join(cwd, self.RECORDS_GLUTEN_TPCH)\n", + " baseline_file = os.path.join(cwd, self.RECORDS_SPARK_TPCH)\n", + " else:\n", + " comp_file = os.path.join(cwd, self.RECORDS_GLUTEN_TPCDS)\n", + " baseline_file = os.path.join(cwd, self.RECORDS_SPARK_TPCDS)\n", + " comp_appid, comp_base_dir, comp_name = get_last_run(comp_file, self.appid)\n", + " if comp_appid:\n", + " command += ' '.join(['', '--comp-appid', comp_appid, '--comp-base-dir', comp_base_dir, '--comp-name', comp_name])\n", + " baseline_appid, baseline_base_dir, _ = get_last_run(baseline_file, '')\n", + " if baseline_appid:\n", + " command += ' '.join(['', '--baseline-appid', baseline_appid, '--baseline-base-dir', baseline_base_dir])\n", " print(command)\n", "\n", " # Block if running on local cluster.\n", @@ -830,7 +924,7 @@ " else:\n", " !ssh {self.server} \"{command} > /dev/null 2>&1 &\"\n", "\n", - " self.perf_html=f'http://{self.server}:8888/view/{self.base_dir}/html/{ts}_{name}_{self.appid}.html'\n", + " self.perf_html=f'http://{self.server}:8889/view/{self.base_dir}/html/{self.app_name}_{self.appid}.html'\n", " display(HTML(f'{self.perf_html}'))\n", " \n", " def load_table(self, table):\n", @@ -869,6 +963,7 @@ " display(HTML(('Completed Query. Time(sec): {:f}'.format(duration))))\n", " \n", " self.result[query] = duration\n", + " self.duration += float(duration)\n", " if print_result:\n", " print(collect)\n", "\n", @@ -881,18 +976,15 @@ " def print_result(self):\n", " print(self.result)\n", " print()\n", - " durations = [float(i) for i in self.result.values()]\n", - " print(\"total duration:\")\n", - " print(sum(durations))\n", - " print()\n", + " print(f\"total duration:\\n{self.duration}\\n\")\n", " if self.server:\n", " print(self.finished_nb)\n", " print(f\"http://{self.server}:1088/tracing_examples/trace_viewer.html#/tracing/test_data/{self.appid}.json\")\n", " print(f\"http://{self.server}:18080/history/{self.appid}\")\n", " print(self.perf_html)\n", " print(self.appid)\n", - " for i in durations:\n", - " print(i)\n", + " for t in self.result.values():\n", + " print(t)\n", " \n", "class TestTPCH(TestTPC):\n", " tpctables = ['customer', 'lineitem', 'nation', 'orders', 'part', 'partsupp', 'region', 'supplier']\n", @@ -998,7 +1090,7 @@ " if run_gluten:\n", " offheap_ratio = gluten_offheap_ratio\n", " else:\n", - " offheap_ratio = vanilla_offheap_ratio\n", + " offheap_ratio = spark_offheap_ratio\n", " driver_memory = convert_to_bytes('20g')\n", " executor_memory_overhead = convert_to_bytes('1g')\n", " \n", @@ -1126,7 +1218,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Vanilla Spark" + "## Spark" ] }, { @@ -1135,10 +1227,10 @@ "metadata": {}, "outputs": [], "source": [ - "def vanilla_tpch_conf_overwrite(conf):\n", + "def spark_tpch_conf_overwrite(conf):\n", " return conf\n", "\n", - "def vanilla_tpcds_conf_overwrite(conf):\n", + "def spark_tpcds_conf_overwrite(conf):\n", " conf.set('spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold', '0')\\\n", " .set('spark.sql.optimizer.runtime.bloomFilter.enabled', 'true')\n", " return conf" @@ -1150,7 +1242,7 @@ "metadata": {}, "outputs": [], "source": [ - "def create_cntx_vanilla(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name='', master='yarn', conf_overwrite=None):\n", + "def create_cntx_spark(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name='', master='yarn', conf_overwrite=None):\n", " conf = default_conf(executors_per_node, cores_per_executor, task_per_core, memory_per_node, extra_jars, app_name, master, run_gluten=False)\n", " conf.set(\"spark.sql.execution.arrow.maxRecordsPerBatch\",20480)\\\n", " .set(\"spark.sql.parquet.columnarReaderBatchSize\",20480)\\\n", @@ -1229,12 +1321,12 @@ "\n", " if workload.lower() == 'tpch':\n", " if not app_name:\n", - " app_name = 'tpch_power'\n", + " app_name = f\"tpch_spark{''.join(spark_version.split('.'))}\"\n", " tabledir = tpch_tabledir\n", " is_tpch_workload=True\n", " elif workload.lower() == 'tpcds':\n", " if not app_name:\n", - " app_name = 'tpcds_power'\n", + " app_name = f\"tpcds_spark{''.join(spark_version.split('.'))}\"\n", " tabledir = tpcds_tabledir\n", " is_tpcds_workload=True\n", " else:\n", @@ -1276,14 +1368,14 @@ " task_per_core = gluten_tpcds_task_per_core\n", " workload_conf_overwrite = gluten_tpcds_conf_overwrite\n", " else:\n", - " app_name = ' '.join(['vanilla', app_name, lastgit[:6]])\n", - " create_cntx_func=create_cntx_vanilla\n", + " app_name = ' '.join(['spark', app_name, lastgit[:6]])\n", + " create_cntx_func=create_cntx_spark\n", " if is_tpch_workload:\n", - " task_per_core = vanilla_tpch_task_per_core\n", - " workload_conf_overwrite = vanilla_tpch_conf_overwrite\n", + " task_per_core = spark_tpch_task_per_core\n", + " workload_conf_overwrite = spark_tpch_conf_overwrite\n", " elif is_tpcds_workload:\n", - " task_per_core = vanilla_tpcds_task_per_core\n", - " workload_conf_overwrite = vanilla_tpcds_conf_overwrite\n", + " task_per_core = spark_tpcds_task_per_core\n", + " workload_conf_overwrite = spark_tpcds_conf_overwrite\n", " \n", " conf_overwrite = lambda conf: app_conf_overwrite(workload_conf_overwrite(conf))\n", " \n", diff --git a/tools/workload/benchmark_velox/params.yaml.template b/tools/workload/benchmark_velox/params.yaml.template index 1c70e428bc99..73e02b728f7b 100644 --- a/tools/workload/benchmark_velox/params.yaml.template +++ b/tools/workload/benchmark_velox/params.yaml.template @@ -20,22 +20,10 @@ disk_dev: nic_dev: - ens787f0 -# Hostname or IP to server for perf analysis. Able to connect via ssh. -server: '' - -# Specify the directory on perf analysis server. Usually a codename for this run. -base_dir: emr - -# Proxy used to connect to server for perf analysis. -proxy: '' - -# Whether to upload profile to perf analysis server and run perf analysis scripts. Only takes effect if server is set. -analyze_perf: True - # Select workload. Can be either 'tpch' or 'tpcds'. workload: tpch -# Run with gluten. If False, run vanilla Spark. +# Run with gluten. If False, run Spark. run_gluten: True # TPC tables @@ -48,20 +36,20 @@ cores_per_executor: 8 gluten_tpch_task_per_core: 2 gluten_tpcds_task_per_core: 2 -vanilla_tpch_task_per_core: 4 -vanilla_tpcds_task_per_core: 4 +spark_tpch_task_per_core: 4 +spark_tpcds_task_per_core: 4 # Physical memory on each worker node. memory_per_node: 1000g -# Offheap ratio. 0 to disable offheap for vanilla Spark. +# Offheap ratio. 0 to disable offheap for Spark. # onheap:offheap = 1:2 -vanilla_offheap_ratio: 2.0 +spark_offheap_ratio: 2.0 # onheap:offheap = 1:7 gluten_offheap_ratio: 7.0 # spark.io.compression.codec -vanilla_codec: lz4 +spark_codec: lz4 # spark.gluten.sql.columnar.shuffle.codec gluten_codec: lz4 # spark.gluten.sql.columnar.shuffle.codecBackend @@ -69,3 +57,22 @@ gluten_codec_backend: '' # spark.gluten.sql.columnar.maxBatchSize max_batch_size: 4096 +# Hostname or IP to server for perf analysis. Able to connect via ssh. +server: '' + +# Specify the directory on perf analysis server. Usually a codename for this run. +base_dir: test + +# Proxy used to connect to server for perf analysis. +proxy: '' + +# Emon event file for `emon -i`. Set to emptry string '' if emon is unavailable. +# Supported emon events on platform can be verified via `emon -i emon.list` +emon_list: /home/sparkuser/ipython/emon.list + +# Whether to run perf analysis scripts. Only takes effect if server is set. +analyze_perf: True + +# List of email to receive perf analysis results. +emails: + - diff --git a/tools/workload/benchmark_velox/sample/Trace-viewer.png b/tools/workload/benchmark_velox/sample/Trace-viewer.png new file mode 100644 index 000000000000..eef24863c918 Binary files /dev/null and b/tools/workload/benchmark_velox/sample/Trace-viewer.png differ diff --git a/tools/workload/benchmark_velox/sample/tpch_q1.html b/tools/workload/benchmark_velox/sample/tpch_q1.html new file mode 100644 index 000000000000..c401c35ec87c --- /dev/null +++ b/tools/workload/benchmark_velox/sample/tpch_q1.html @@ -0,0 +1,16401 @@ + + + + + +2024_12_06_055328_tpch_gluten_application_1733153225851_0048.nbconvert + + + + + + + + + + +
+
+
+
+
+

Parameters

+
+
+
+
+
+
+
+
+
+

start analysis cluster and run

+
+
+
+
+
+
+
+
+
+
+
+
+
Setting default log level to "WARN".
+To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
+
+
+
+
+
+
24/12/06 05:53:36 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
+
+
+
+
+
+
24/12/06 05:53:37 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
+24/12/06 05:53:37 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
+
+
+
+
+
+
/home/sparkuser/spark/python/pyspark/sql/context.py:112: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
+  warnings.warn(
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Sparklog

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+

Content

+
+
+
+ +
+
+

Self app info

+
+
+
+
+
+
+
+
+
+
+
load data  /sr213/application_1733153225851_0048/app.log
+
+
+
+
+
+
[Stage 0:>                                                          (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 1:>                                                          (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 4:>                                                          (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 5:>                                                          (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 17:>                                                         (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 39:>                                                      (0 + 16) / 200]

[Stage 39:=>                                                     (6 + 16) / 200]
+
+
+
+
+
[Stage 39:===>                                                  (14 + 16) / 200]
+
+
+
+
+
[Stage 39:========>                                             (33 + 16) / 200]

[Stage 39:==================>                                   (67 + 16) / 200]
+
+
+
+
+
[Stage 39:==========================>                           (97 + 16) / 200]

[Stage 39:==================================>                  (131 + 16) / 200]
+
+
+
+
+
[Stage 39:=================================================>   (185 + 15) / 200]

                                                                                
+
+
+ +
+
+
[Stage 42:(177 + 5) / 200][Stage 43:>   (0 + 1) / 1][Stage 44:>(0 + 11) / 200]

                                                                                
+
+
+
+
+
[Stage 44:(113 + 12) / 200][Stage 45:>   (0 + 1) / 1][Stage 46:> (0 + 3) / 200]

[Stage 44:(182 + 5) / 200][Stage 46:>(4 + 11) / 200][Stage 47:> (0 + 0) / 200]
+
+
+
+
+
[Stage 46:(43 + 16) / 200][Stage 47:> (0 + 0) / 200][Stage 48:> (0 + 0) / 200]

[Stage 46:(110 + 8) / 200][Stage 47:> (0 + 8) / 200][Stage 48:> (0 + 0) / 200]
+
+
+
+
+
[Stage 46:(155 + 8) / 200][Stage 47:>(47 + 8) / 200][Stage 48:> (0 + 0) / 200]

[Stage 46:(194 + 4) / 200][Stage 47:>(73 + 4) / 200][Stage 48:> (8 + 8) / 200]
+
+
+
+
+
[Stage 47:(114 + 8) / 200][Stage 48:>(57 + 4) / 200][Stage 49:>  (0 + 4) / 16]

[Stage 47:(185 + 4) / 200][Stage 48:>(73 + 4) / 200][Stage 49:>  (0 + 8) / 16]
+
+
+
+
+
[Stage 48:(126 + 8) / 200][Stage 49:>  (0 + 8) / 16][Stage 51:>   (0 + 0) / 1]

[Stage 48:(184 + 4) / 200][Stage 49:> (4 + 12) / 16][Stage 51:>   (0 + 0) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
appidapplication_1733153225851_0048
executor.instances4
executor.cores4
shuffle.partitions32
batch size4,096
real executors4
Failed Tasks
Speculative Tasks0
Speculative Killed Tasks0
Speculative Stage0
runtime17.65
disk spilled0.0
memspilled0.0
local_read0.0
remote_read0.0
shuffle_write0.0
task run time6.79
ser_time0.0
f_wait_time0.0
gc_time0.03
input read22.54
acc_task_time13.99
file read size5,951.35
file write size24.52
disk read size5.05
disk write size15.31
disk cancel size0.0
+
+
+
+
+
{'appid': 'application_1733153225851_0048',
+ 'executor.instances': 4,
+ 'executor.cores': 4,
+ 'shuffle.partitions': 32,
+ 'batch size': 4096,
+ 'real executors': 4,
+ 'Failed Tasks': '',
+ 'Speculative Tasks': 0,
+ 'Speculative Killed Tasks': 0,
+ 'Speculative Stage': 0,
+ 'runtime': 17.65,
+ 'disk spilled': 0.0,
+ 'memspilled': 0.0,
+ 'local_read': 0.0,
+ 'remote_read': 0.0,
+ 'shuffle_write': 0.0,
+ 'task run time': 6.79,
+ 'ser_time': 0.0,
+ 'f_wait_time': 0.0,
+ 'gc_time': 0.03,
+ 'input read': 22.54,
+ 'acc_task_time': 13.99,
+ 'file read size': 5951.35,
+ 'file write size': 24.52,
+ 'disk read size': 5.05,
+ 'disk write size': 15.31,
+ 'disk cancel size': 0.0}
+
+
+
+
+
+
+
+
+
+
+
[Stage 92:(161 + 4) / 200][Stage 93:>(68 + 9) / 200][Stage 94:> (8 + 4) / 200]

[Stage 93:(151 + 4) / 200][Stage 94:>(66 + 8) / 200][Stage 95:>  (1 + 4) / 16]
+
+
+
+
+
                                                                                
+
+
+
+
+
/sr213/application_1733153225851_0048/sr217/emon.parquet is not found, trying to load data ...
+
+
+
+
+
+
[Stage 129:>                (0 + 2) / 2][Stage 130:>                (0 + 2) / 2]

[Stage 129:>  (0 + 2) / 2][Stage 130:>  (0 + 2) / 2][Stage 131:>  (0 + 4) / 4]
+
+
+
+
+
[Stage 129:=> (1 + 1) / 2][Stage 130:=> (1 + 1) / 2][Stage 131:>  (1 + 3) / 4]

[Stage 129:========>        (1 + 1) / 2][Stage 131:====>            (1 + 3) / 4]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 143:==>                                                  (16 + 16) / 400]

[Stage 143:==>                                                  (17 + 16) / 400]
+
+
+
+
+
[Stage 143:===>                                                 (24 + 16) / 400]

[Stage 143:====>                                                (34 + 16) / 400]
+
+
+
+
+
[Stage 143:=====>                                               (43 + 16) / 400]

[Stage 143:=======>                                             (53 + 16) / 400]
+
+
+
+
+
[Stage 143:========>                                            (65 + 16) / 400]

[Stage 143:=========>                                           (72 + 16) / 400]
+
+
+
+
+
[Stage 143:==========>                                          (83 + 16) / 400]

[Stage 143:===========>                                         (90 + 16) / 400]
+
+
+
+
+
[Stage 143:=============>                                       (99 + 16) / 400]

[Stage 143:=============>                                      (106 + 16) / 400]
+
+
+
+
+
[Stage 143:==============>                                     (113 + 16) / 400]

[Stage 143:===============>                                    (118 + 16) / 400]
+
+
+
+
+
[Stage 143:================>                                   (126 + 16) / 400]

[Stage 143:=================>                                  (132 + 16) / 400]
+
+
+
+
+
[Stage 143:==================>                                 (140 + 16) / 400]

[Stage 143:==================>                                 (146 + 16) / 400]
+
+
+
+
+
[Stage 143:===================>                                (153 + 16) / 400]

[Stage 143:====================>                               (160 + 16) / 400]
+
+
+
+
+
[Stage 143:======================>                             (170 + 16) / 400]

[Stage 143:======================>                             (173 + 16) / 400]
+
+
+
+
+
[Stage 143:=======================>                            (182 + 16) / 400]

[Stage 143:========================>                           (186 + 16) / 400]
+
+
+
+
+
[Stage 143:=========================>                          (194 + 16) / 400]

[Stage 143:=========================>                          (197 + 16) / 400]
+
+
+
+
+
[Stage 143:==========================>                         (204 + 16) / 400]

[Stage 143:===========================>                        (211 + 16) / 400]
+
+
+
+
+
[Stage 143:===========================>                        (214 + 16) / 400]

[Stage 143:============================>                       (222 + 16) / 400]
+
+
+
+
+
[Stage 143:=============================>                      (230 + 16) / 400]

[Stage 143:==============================>                     (236 + 16) / 400]
+
+
+
+
+
[Stage 143:===============================>                    (243 + 16) / 400]

[Stage 143:================================>                   (249 + 16) / 400]
+
+
+
+
+
[Stage 143:=================================>                  (256 + 16) / 400]

[Stage 143:==================================>                 (263 + 16) / 400]
+
+
+
+
+
[Stage 143:===================================>                (272 + 16) / 400]

[Stage 143:====================================>               (279 + 16) / 400]
+
+
+
+
+
[Stage 143:======================================>             (294 + 16) / 400]

[Stage 143:======================================>             (299 + 16) / 400]
+
+
+
+
+
[Stage 143:========================================>           (311 + 16) / 400]

[Stage 143:=========================================>          (322 + 16) / 400]
+
+
+
+
+
[Stage 143:===========================================>        (333 + 17) / 400]

[Stage 143:=============================================>      (348 + 16) / 400]
+
+
+
+
+
[Stage 143:==============================================>     (360 + 16) / 400]

[Stage 143:================================================>   (372 + 16) / 400]
+
+
+
+
+
[Stage 143:==================================================> (386 + 14) / 400]
+
+
+
+
+
[Stage 148:>                                                        (0 + 1) / 1]
+
+
+
+
+
[Stage 154:>                                                        (0 + 3) / 3]
+
+
+
+
+
[Stage 154:===================>                                     (1 + 2) / 3]

[Stage 154:======================================>                  (2 + 1) / 3]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 157:>                                                        (0 + 3) / 3]

[Stage 157:===================>                                     (1 + 2) / 3]
+
+
+
+
+
[Stage 157:======================================>                  (2 + 1) / 3]

                                                                                
+
+
+
+
+
sar metric
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 application_1733153225851_0048
runtime17.650000
disk spilled0.000000
shuffle_write0.000000
f_wait_time0.000000
input read22.540000
acc_task_time13.990000
output rows1.180000
%user>90%0.931034
%kernel>10%0.965517
%iowait>10%0.620690
avg %user41.216207
avg %system4.514138
avg %iowait0.743793
avg disk util32.206897
time more than 90%0.000000
total read (G)5.388613
total write (G)1.121773
avg read bw (MB/s)190.273771
avg write bw (MB/s)39.610183
read bw %75411.578125
read bw %95484.542969
read bw max510.351562
time_rd_morethan_950.034483
write bw %751.074219
write bw %95165.687500
write bw max812.511719
time_wr_morethan_950.034483
cached mean93.896552
cached 75%145.000000
cached max188.000000
used mean834.000000
used 75%852.000000
used max859.000000
rx MB/s 75%0.000000
rx MB/s 95%0.000000
rx MB/s 99%0.000000
pgin mean190.206897
pgin 75%412.000000
pgin max509.000000
pgout mean40.965517
pgout 75%1.000000
pgout max840.000000
fault mean117653.310345
fault 75%205151.000000
fault max256538.000000
cpu%_avg0.448817
cpu freq_avg3241.915617
pathlength_sum1933.000000
ipc_avg1.137983
+
+
+
+
+
+
+
+
+
+
+
[Stage 330:>                                                        (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 341:>                                                        (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
DEV in ('nvme0n1')
+
+
+
+
+
+
[Stage 388:>                                                        (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 396:>                                                        (0 + 1) / 1]
+
+
+
+
+
                                                                                
+
+
+
+
+
{'sr217': 200}
+
+
+
+
+
[Stage 490:===================>                                     (1 + 2) / 3]

                                                                                
+
+
+ +
+
+
+
+
+
+
+
gluten tpch_power 6600a1
+
+
+
+
+
+
+
+
+
+
[Stage 605:==>                                                   (11 + 0) / 200]

[Stage 531:(174 + 16) / 200][Stage 532:>(0 + 0) / 200][Stage 533:>(0 + 0) / 200]
+
+
+
+
+
[Stage 532:(102 + 16) / 200][Stage 533:>(0 + 0) / 200][Stage 534:>(0 + 0) / 200]

[Stage 533:(72 + 16) / 200][Stage 534:>(0 + 0) / 200][Stage 535:>(0 + 0) / 200]
+
+
+
+
+
[Stage 534:(63 + 16) / 200][Stage 535:>(0 + 0) / 200][Stage 536:>(0 + 0) / 200]

[Stage 535:(75 + 16) / 200][Stage 536:>(0 + 0) / 200][Stage 537:>(0 + 0) / 200]
+
+
+
+
+
[Stage 536:(102 + 17) / 200][Stage 537:>(0 + 0) / 200][Stage 538:>(3 + 0) / 200]

[Stage 537:(114 + 16) / 200][Stage 538:>(3 + 0) / 200][Stage 539:>(0 + 0) / 200]
+
+
+
+
+
[Stage 538:(105 + 16) / 200][Stage 539:>(0 + 0) / 200][Stage 540:>(0 + 0) / 200]

[Stage 539:(67 + 16) / 200][Stage 540:>(0 + 0) / 200][Stage 541:>(0 + 0) / 200]
+
+
+
+
+
[Stage 540:(59 + 16) / 200][Stage 541:>(0 + 0) / 200][Stage 542:>(0 + 0) / 200]

[Stage 541:(104 + 16) / 200][Stage 542:>(0 + 0) / 200][Stage 543:>(0 + 0) / 200]
+
+
+
+
+
[Stage 542:(115 + 16) / 200][Stage 543:>(0 + 0) / 200][Stage 544:>(0 + 0) / 200]

[Stage 543:(148 + 16) / 200][Stage 544:>(0 + 0) / 200][Stage 545:>(0 + 0) / 200]
+
+
+
+
+
[Stage 545:(0 + 16) / 200][Stage 546:>(0 + 0) / 200][Stage 547:>(0 + 0) / 200]

[Stage 546:(38 + 16) / 200][Stage 547:>(0 + 0) / 200][Stage 548:>(0 + 0) / 200]
+
+
+
+
+
[Stage 547:(58 + 17) / 200][Stage 548:>(0 + 0) / 200][Stage 549:>(0 + 0) / 200]

[Stage 548:(94 + 16) / 200][Stage 549:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]
+
+
+
+
+
[Stage 549:(113 + 17) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]

[Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]
+
+
+
+
+
[Stage 551:(16 + 1) / 200][Stage 553:>(0 + 0) / 200][Stage 555:>(0 + 0) / 200]

[Stage 551:(179 + 16) / 200][Stage 553:>(0 + 0) / 200][Stage 555:>(0 + 0) / 200]
+
+
+
+
+
[Stage 553:(16 + 0) / 200][Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200]

[Stage 553:(54 + 17) / 200][Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200]
+
+
+
+
+
[Stage 555:>(8 + 8) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]

[Stage 555:(16 + 0) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]
+
+
+
+
+
[Stage 555:(199 + 1) / 200][Stage 557:(0 + 15) / 200][Stage 559:>(0 + 0) / 200]

[Stage 557:(16 + 0) / 200][Stage 559:>(0 + 0) / 200][Stage 560:>(0 + 0) / 200]
+
+
+
+
+
[Stage 557:(84 + 16) / 200][Stage 559:>(0 + 0) / 200][Stage 560:>(0 + 0) / 200]

[Stage 559:(16 + 0) / 200][Stage 560:>(0 + 0) / 200][Stage 564:>(0 + 0) / 200]
+
+
+
+
+
[Stage 559:(29 + 16) / 200][Stage 560:>(0 + 0) / 200][Stage 564:>(0 + 0) / 200]

[Stage 560:(85 + 16) / 200][Stage 564:>(0 + 0) / 200][Stage 566:>(0 + 0) / 200]
+
+
+
+
+
[Stage 564:>(0 + 0) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]
+
+
+
+
+
[Stage 564:(16 + 0) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]
+
+
+
+
+
[Stage 564:(149 + 16) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]

[Stage 566:(16 + 0) / 200][Stage 568:>(0 + 0) / 200][Stage 569:>(0 + 0) / 200]
+
+
+
+
+
[Stage 566:(71 + 16) / 200][Stage 568:>(0 + 0) / 200][Stage 569:>(0 + 0) / 200]

[Stage 568:(16 + 0) / 200][Stage 569:>(0 + 0) / 200][Stage 573:>(0 + 0) / 200]
+
+
+
+
+
[Stage 568:(16 + 2) / 200][Stage 569:>(0 + 0) / 200][Stage 573:>(0 + 0) / 200]

[Stage 569:(42 + 18) / 200][Stage 573:>(0 + 0) / 200][Stage 575:>(0 + 0) / 200]
+
+
+
+
+
[Stage 573:>(0 + 0) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]
+
+
+
+
+
[Stage 573:(0 + 16) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]

[Stage 573:(16 + 0) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]
+
+
+
+
+
[Stage 573:(67 + 16) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]

[Stage 575:(16 + 0) / 200][Stage 577:>(0 + 0) / 200][Stage 578:>(0 + 0) / 200]
+
+
+
+
+
[Stage 575:(144 + 16) / 200][Stage 577:>(0 + 0) / 200][Stage 578:>(0 + 0) / 200]

[Stage 577:(16 + 0) / 200][Stage 578:>(0 + 0) / 200][Stage 581:>(0 + 0) / 200]
+
+
+
+
+
[Stage 577:(184 + 16) / 200][Stage 578:>(0 + 0) / 200][Stage 581:>(0 + 0) / 200]

[Stage 578:(197 + 3) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]
+
+
+
+
+
[Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]

[Stage 580:(58 + 16) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]
+
+
+
+
+
[Stage 580:(176 + 17) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]

[Stage 581:(195 + 5) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]
+
+
+
+
+
[Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]

[Stage 583:(69 + 17) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]
+
+
+
+
+
[Stage 583:(170 + 16) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]

[Stage 585:(16 + 0) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]
+
+
+
+
+
[Stage 585:(75 + 16) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]
+
+
+
+
+
[Stage 587:(16 + 0) / 200][Stage 589:>(0 + 0) / 200][Stage 591:>(0 + 0) / 200]
+
+
+
+
+
[Stage 587:(16 + 6) / 200][Stage 589:>(0 + 0) / 200][Stage 591:>(0 + 0) / 200]

[Stage 589:>(8 + 8) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]
+
+
+
+
+
[Stage 589:(16 + 0) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]
+
+
+
+
+
[Stage 589:(182 + 17) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]

[Stage 591:(16 + 0) / 200][Stage 593:>(0 + 0) / 200][Stage 595:>(0 + 0) / 200]
+
+
+
+
+
[Stage 591:(133 + 17) / 200][Stage 593:>(0 + 0) / 200][Stage 595:>(0 + 0) / 200]

[Stage 593:(16 + 0) / 200][Stage 595:>(0 + 0) / 200][Stage 597:>(0 + 0) / 200]
+
+
+
+
+
[Stage 593:(39 + 18) / 200][Stage 595:>(0 + 0) / 200][Stage 597:>(0 + 0) / 200]

[Stage 595:(16 + 0) / 200][Stage 597:>(0 + 0) / 200][Stage 599:>(0 + 0) / 200]
+
+
+
+
+
[Stage 597:(16 + 0) / 200][Stage 599:>(0 + 0) / 200][Stage 600:>(0 + 0) / 200]
+
+
+
+
+
[Stage 599:(0 + 16) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]

[Stage 599:(16 + 0) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]
+
+
+
+
+
[Stage 599:(169 + 16) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]

[Stage 605:==>                                                   (11 + 0) / 200]
+
+
+
+
+
[Stage 602:======>     (102 + 18) / 200][Stage 605:>             (11 + 0) / 200]

[Stage 605:==>                                                  (11 + 16) / 200]
+
+
+
+
+
[Stage 606:============================================>       (170 + 16) / 200]
+
+
+
+
+
[Stage 607:===================================>                (135 + 16) / 200]
+
+
+
+
+
                                                                                
+
+
+
+
+
[Stage 932:====================================================>(197 + 3) / 200]

                                                                                
+
+
+
+
+
+
+
+
+
+
application_1733153225851_0048
+
+ +
+
query time
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 runtimedisk spilledmemspilledlocal_readremote_readshuffle_writedeser_timerun_timeser_timef_wait_timegc_timepeak_memqueryidinput readacc_task_timestagesoutput rowsexecutorscore/exectask.cpusparallelism
real_queryid                     
117.6500000.0000000.0000000.0000000.0000000.0000000.2000006.7900000.0000000.0000000.0300001.340000822.54000013.990000[ 8 10 12 15]1.18000044132
+
+
+
+
operator count
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 01
AQEShuffleRead02
AdaptiveSparkPlan01
ColumnarExchange02
FilterExecTransformer01
FlushableHashAggregateExecTransformer01
InputAdapter02
InputIteratorTransformer02
ProjectExecTransformer02
RegularHashAggregateExecTransformer01
Scan parquet 01
ShuffleQueryStage02
SortExecTransformer01
VeloxColumnarToRow01
VeloxResizeBatches02
+
+
+
+
operator input row count
+
+
+
+ + + + + + + + + + + + + + + + + +
 1
ColumnarExchange0.000000
VeloxResizeBatches0.000000
+
+
+
+
operator output row count
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 1
ColumnarExchange0.000000
FlushableHashAggregateExecTransformer0.000000
InputIteratorTransformer0.000000
ProjectExecTransformer591.600000
RegularHashAggregateExecTransformer0.000000
Scan parquet 591.600000
SortExecTransformer0.000000
VeloxColumnarToRow0.000000
VeloxResizeBatches0.000000
+
+
+
+
+No description has been provided for this image +
+
+
+
+No description has been provided for this image +
+
+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0
44%_time of scan and filter7.53
36%_time of project6.13
16%_not_counted2.69
3%_idle0.55
0%_time of input iterator0.06
0%_time of aggregation0.03
0%_time to append / split batches0.00
0%_time of rowConstruction0.00
0%_time to split0.00
0%_time to deserialize0.00
0%_time of sort0.00
0%_time of extraction0.00
0%_shuffle write time0.00
0%_time to convert0.00
0%_time to compress0.00
0%_time to spill0.00
0%_time to decompress0.00
+
+
+
+
+No description has been provided for this image +
+
+
+
+
+
+
+

Compare to vanilla

+
+
+
+
+
+
+
+
+
load data  /sr213/application_1733153225851_0029/app.log
+
+
+
+
+
+
emon metric
+
+
+
+
+
+
[Stage 1319:>                                                       (0 + 3) / 3]
+
+
+
+
+
[Stage 1319:==================>                                     (1 + 2) / 3]

                                                                                
+
+
+
+
+
sar metric
+
+
+
+
+
+
time breakdown
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 application_1733153225851_0029application_1733153225851_0048diff
runtime132.1417.65648.67%
shuffle_write0.000.000.00%
f_wait_time0.000.000.00%
input read22.5422.540.00%
acc_task_time128.0113.99815.01%
output rows1.791.1851.69%
%user>90%0.990.935.91%
%kernel>10%0.990.972.85%
%iowait>10%0.310.62-49.30%
avg %user82.1141.2299.21%
avg %system6.104.5135.11%
avg %iowait0.170.74-76.60%
avg disk util7.1332.21-77.85%
time more than 90%0.000.000.00%
total read (G)5.245.39-2.75%
total write (G)0.021.12-97.81%
avg read bw (MB/s)37.52190.27-80.28%
avg write bw (MB/s)0.1839.61-99.55%
read bw %7559.27411.58-85.60%
read bw %95173.05484.54-64.29%
read bw max236.70510.35-53.62%
time_rd_morethan_950.050.0341.96%
write bw %750.071.07-93.45%
write bw %951.23165.69-99.25%
write bw max1.70812.51-99.79%
time_wr_morethan_950.000.03-100.00%
cached mean88.3393.90-5.93%
cached 75%132.00145.00-8.97%
cached max160.00188.00-14.89%
used mean2,060.73834.00147.09%
used 75%2,343.00852.00175.00%
used max2,346.00859.00173.11%
rx MB/s 75%0.000.000.00%
rx MB/s 95%0.000.000.00%
rx MB/s 99%0.000.000.00%
pgin mean37.37190.21-80.35%
pgin 75%59.00412.00-85.68%
pgin max352.00509.00-30.84%
pgout mean0.1340.97-99.68%
pgout 75%0.001.00-100.00%
pgout max2.00840.00-99.76%
fault mean952,586.87117,653.31709.66%
fault 75%1,426,717.00205,151.00595.45%
fault max2,628,392.00256,538.00924.56%
cpu%_avg0.880.4596.45%
cpu freq_avg3,460.223,241.926.73%
pathlength_sum17,960.001,933.00829.13%
ipc_avg1.271.1411.30%
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 runtimeshuffle_writef_wait_timeinput readacc_task_timeoutput rows
real_queryid      
1 +
132.14
+
17.65
+
648.67%
+
+
0.00
+
0.00
+
nan%
+
+
0.00
+
0.00
+
nan%
+
+
22.54
+
22.54
+
0.00%
+
+
128.01
+
13.99
+
815.01%
+
+
1.79
+
1.18
+
51.69%
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 sr217agg
0  
%user>90% +
0.99
+
0.93
+
5.91%
+
+
0.99
+
0.93
+
5.91%
+
%kernel>10% +
0.99
+
0.97
+
2.85%
+
+
0.99
+
0.97
+
2.85%
+
%iowait>10% +
0.31
+
0.62
+
-49.30%
+
+
0.31
+
0.62
+
-49.30%
+
avg %user +
82.11
+
41.22
+
99.21%
+
+
82.11
+
41.22
+
99.21%
+
avg %system +
6.10
+
4.51
+
35.11%
+
+
6.10
+
4.51
+
35.11%
+
avg %iowait +
0.17
+
0.74
+
-76.60%
+
+
0.17
+
0.74
+
-76.60%
+
avg disk util +
7.13
+
32.21
+
-77.85%
+
+
7.13
+
32.21
+
-77.85%
+
time more than 90% +
0.00
+
0.00
+
nan%
+
+
0.00
+
0.00
+
nan%
+
total read (G) +
5.24
+
5.39
+
-2.75%
+
+
5.24
+
5.39
+
-2.75%
+
total write (G) +
0.02
+
1.12
+
-97.81%
+
+
0.02
+
1.12
+
-97.81%
+
avg read bw (MB/s) +
37.52
+
190.27
+
-80.28%
+
+
37.52
+
190.27
+
-80.28%
+
avg write bw (MB/s) +
0.18
+
39.61
+
-99.55%
+
+
0.18
+
39.61
+
-99.55%
+
read bw %75 +
59.27
+
411.58
+
-85.60%
+
+
59.27
+
411.58
+
-85.60%
+
read bw %95 +
173.05
+
484.54
+
-64.29%
+
+
173.05
+
484.54
+
-64.29%
+
read bw max +
236.70
+
510.35
+
-53.62%
+
+
236.70
+
510.35
+
-53.62%
+
time_rd_morethan_95 +
0.05
+
0.03
+
41.96%
+
+
0.05
+
0.03
+
41.96%
+
write bw %75 +
0.07
+
1.07
+
-93.45%
+
+
0.07
+
1.07
+
-93.45%
+
write bw %95 +
1.23
+
165.69
+
-99.25%
+
+
1.23
+
165.69
+
-99.25%
+
write bw max +
1.70
+
812.51
+
-99.79%
+
+
1.70
+
812.51
+
-99.79%
+
time_wr_morethan_95 +
0.00
+
0.03
+
-100.00%
+
+
0.00
+
0.03
+
-100.00%
+
cached mean +
88.33
+
93.90
+
-5.93%
+
+
88.33
+
93.90
+
-5.93%
+
cached 75% +
132.00
+
145.00
+
-8.97%
+
+
132.00
+
145.00
+
-8.97%
+
cached max +
160.00
+
188.00
+
-14.89%
+
+
160.00
+
188.00
+
-14.89%
+
used mean +
2,060.73
+
834.00
+
147.09%
+
+
2,060.73
+
834.00
+
147.09%
+
used 75% +
2,343.00
+
852.00
+
175.00%
+
+
2,343.00
+
852.00
+
175.00%
+
used max +
2,346.00
+
859.00
+
173.11%
+
+
2,346.00
+
859.00
+
173.11%
+
rx MB/s 75% +
0.00
+
0.00
+
nan%
+
+
0.00
+
0.00
+
nan%
+
rx MB/s 95% +
0.00
+
0.00
+
nan%
+
+
0.00
+
0.00
+
nan%
+
rx MB/s 99% +
0.00
+
0.00
+
nan%
+
+
0.00
+
0.00
+
nan%
+
pgin mean +
37.37
+
190.21
+
-80.35%
+
+
37.37
+
190.21
+
-80.35%
+
pgin 75% +
59.00
+
412.00
+
-85.68%
+
+
59.00
+
412.00
+
-85.68%
+
pgin max +
352.00
+
509.00
+
-30.84%
+
+
352.00
+
509.00
+
-30.84%
+
pgout mean +
0.13
+
40.97
+
-99.68%
+
+
0.13
+
40.97
+
-99.68%
+
pgout 75% +
0.00
+
1.00
+
-100.00%
+
+
0.00
+
1.00
+
-100.00%
+
pgout max +
2.00
+
840.00
+
-99.76%
+
+
2.00
+
840.00
+
-99.76%
+
fault mean +
952,586.87
+
117,653.31
+
709.66%
+
+
952,586.87
+
117,653.31
+
709.66%
+
fault 75% +
1,426,717.00
+
205,151.00
+
595.45%
+
+
1,426,717.00
+
205,151.00
+
595.45%
+
fault max +
2,628,392.00
+
256,538.00
+
924.56%
+
+
2,628,392.00
+
256,538.00
+
924.56%
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
clientsr217agg
cpu%_avg +
0.88
+
0.45
+
96.45%
+
+
0.88
+
0.45
+
96.45%
+
cpu freq_avg +
3,460.22
+
3,241.92
+
6.73%
+
+
3,460.22
+
3,241.92
+
6.73%
+
pathlength_sum +
17,960.00
+
1,933.00
+
829.13%
+
+
17,960.00
+
1,933.00
+
829.13%
+
ipc_avg +
1.27
+
1.14
+
11.30%
+
+
1.27
+
1.14
+
11.30%
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 indexStage IDJob IDreal_queryidqueryidtotal_timestdev_timeacc_totaltotal
008818127.981.9199.65%99.65%
11109180.29nan99.87%0.23%
221210180.09nan99.94%0.07%
331511180.07nan100.00%0.06%
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 indexStage IDJob IDreal_queryidqueryidtotal_timestdev_timeacc_totaltotal
00881813.860.3286.65%86.65%
111210180.98nan92.80%6.15%
22109180.74nan97.43%4.63%
331511180.41nan100.00%2.57%
+
+
+
+
+No description has been provided for this image +
+
+
+
+No description has been provided for this image +
+
+
+
+No description has been provided for this image +
+
+
+
+
+
+
+

Config compare

+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
0851_00480851_0029comp
callSite.shortcollect at /tmp/ipykernel_265482/1936321720.py:117collect at /tmp/ipykernel_234307/1936321720.py:117False
spark.app.submitTime17334643016691733457038427False
spark.executor.extraClassPathfile:///data0/home/sparkuser/jars/6600a164407ae0e4f5ea5b33dc4b902f23a27730/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-snapshot.jarFalse
spark.executor.extraJavaOptions-xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/home/sparkuser/logs/java/hs_err_pid%p.log-xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/data0/home/sparkuser/logs/java/hs_err_pid%p.logFalse
spark.executor.memory10944m29184mFalse
spark.gluten.memory.conservative.task.offHeap.size.in.bytes10041163776NaNFalse
spark.gluten.memory.dynamic.offHeap.sizing.enabledfalseNaNFalse
spark.gluten.memory.offHeap.size.in.bytes80329310208NaNFalse
spark.gluten.memory.overAcquiredMemoryRatio0NaNFalse
spark.gluten.memory.task.offHeap.size.in.bytes20082327552NaNFalse
spark.gluten.memoryOverhead.size.in.bytes1073741824NaNFalse
spark.gluten.numTaskSlotsPerExecutor4NaNFalse
spark.gluten.sql.columnar.backend.libveloxNaNFalse
spark.gluten.sql.columnar.coalesce.batchestrueNaNFalse
spark.gluten.sql.columnar.forceshuffledhashjointrueNaNFalse
spark.gluten.sql.columnar.maxBatchSize4096NaNFalse
spark.gluten.sql.columnar.shuffle.codeclz4NaNFalse
spark.gluten.sql.columnar.shuffle.codecBackendNaNFalse
spark.gluten.sql.session.timeZone.defaultetc/utcNaNFalse
spark.memory.offHeap.size8032931020858368mFalse
spark.pluginsorg.apache.gluten.glutenpluginNaNFalse
spark.repl.class.outputDir/tmp/tmpypqh85b0/tmp/tmpynceqaxdFalse
spark.repl.class.urispark://sr213:40521/classesspark://sr213:34951/classesFalse
spark.shuffle.managerorg.apache.spark.shuffle.sort.columnarshufflemanagerNaNFalse
spark.sql.adaptive.customCostEvaluatorClassorg.apache.spark.sql.execution.adaptive.glutencostevaluatorNaNFalse
spark.sql.extensionsorg.apache.gluten.extension.glutensessionextensionsNaNFalse
spark.sql.files.maxPartitionBytes4gNaNFalse
spark.sql.shuffle.partitions3264False
+
+
+
+
+
+
+
+

Convert to HTML

+
+
+
+
+
+
+
+
+ +
+
+
+
+
+
+
+
+
+
+
+
+ + diff --git a/tools/workload/benchmark_velox/sample/tpch_q1.nbconvert.ipynb b/tools/workload/benchmark_velox/sample/tpch_q1.nbconvert.ipynb new file mode 100644 index 000000000000..122f1c3c7dec --- /dev/null +++ b/tools/workload/benchmark_velox/sample/tpch_q1.nbconvert.ipynb @@ -0,0 +1,4984 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4371325d", + "metadata": { + "papermill": { + "duration": 0.003812, + "end_time": "2024-12-06T05:53:34.206544", + "exception": false, + "start_time": "2024-12-06T05:53:34.202732", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c61021c1", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:53:34.214595Z", + "iopub.status.busy": "2024-12-06T05:53:34.214315Z", + "iopub.status.idle": "2024-12-06T05:53:34.220494Z", + "shell.execute_reply": "2024-12-06T05:53:34.220105Z" + }, + "papermill": { + "duration": 0.011601, + "end_time": "2024-12-06T05:53:34.221688", + "exception": false, + "start_time": "2024-12-06T05:53:34.210087", + "status": "completed" + }, + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "appid=''\n", + "disk=''\n", + "nic=''\n", + "tz=''\n", + "base_dir=''\n", + "name=''\n", + "proxy=''\n", + "\n", + "comp_appid=''\n", + "comp_base_dir=''\n", + "comp_name=''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f4c7b29a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:53:34.229763Z", + "iopub.status.busy": "2024-12-06T05:53:34.229436Z", + "iopub.status.idle": "2024-12-06T05:53:34.232162Z", + "shell.execute_reply": "2024-12-06T05:53:34.231769Z" + }, + "papermill": { + "duration": 0.008176, + "end_time": "2024-12-06T05:53:34.233387", + "exception": false, + "start_time": "2024-12-06T05:53:34.225211", + "status": "completed" + }, + "tags": [ + "injected-parameters" + ] + }, + "outputs": [], + "source": [ + "# Parameters\n", + "appid = \"application_1733153225851_0048\"\n", + "disk = \"nvme0n1\"\n", + "nic = \"enp61s0f0\"\n", + "tz = \"Etc/GMT+0\"\n", + "base_dir = \"sr213\"\n", + "name = \"tpch_gluten\"\n", + "comp_appid = \"application_1733153225851_0029\"\n", + "comp_base_dir = \"sr213\"\n", + "comp_name = \"vanilla\"\n", + "proxy = \"http://10.239.44.250:8080\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "51887dbb", + "metadata": { + "papermill": { + "duration": 0.003585, + "end_time": "2024-12-06T05:53:34.240616", + "exception": false, + "start_time": "2024-12-06T05:53:34.237031", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# start analysis cluster and run" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "11b3e5f6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:53:34.248767Z", + "iopub.status.busy": "2024-12-06T05:53:34.248529Z", + "iopub.status.idle": "2024-12-06T05:53:34.251294Z", + "shell.execute_reply": "2024-12-06T05:53:34.250897Z" + }, + "papermill": { + "duration": 0.008331, + "end_time": "2024-12-06T05:53:34.252497", + "exception": false, + "start_time": "2024-12-06T05:53:34.244166", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import findspark\n", + "findspark.init()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "58fa24f6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:53:34.260508Z", + "iopub.status.busy": "2024-12-06T05:53:34.260287Z", + "iopub.status.idle": "2024-12-06T05:53:34.263142Z", + "shell.execute_reply": "2024-12-06T05:53:34.262754Z" + }, + "papermill": { + "duration": 0.008226, + "end_time": "2024-12-06T05:53:34.264308", + "exception": false, + "start_time": "2024-12-06T05:53:34.256082", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "def get_py4jzip():\n", + " spark_home=os.environ['SPARK_HOME']\n", + " py4jzip = !ls {spark_home}/python/lib/py4j*.zip\n", + " return py4jzip[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6608ae2f", + "metadata": { + "code_folding": [], + "execution": { + "iopub.execute_input": "2024-12-06T05:53:34.272550Z", + "iopub.status.busy": "2024-12-06T05:53:34.272222Z", + "iopub.status.idle": "2024-12-06T05:54:05.226922Z", + "shell.execute_reply": "2024-12-06T05:54:05.226384Z" + }, + "papermill": { + "duration": 30.960697, + "end_time": "2024-12-06T05:54:05.228547", + "exception": false, + "start_time": "2024-12-06T05:53:34.267850", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/12/06 05:53:36 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "24/12/06 05:53:37 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.\n", + "24/12/06 05:53:37 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sparkuser/spark/python/pyspark/sql/context.py:112: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from pyspark import SparkConf, SparkContext\n", + "from pyspark.sql import SQLContext\n", + "import time\n", + "import sys\n", + "conf = (SparkConf()\n", + " .set('spark.app.name', f'perf_analysis_{appid}')\n", + " .set('spark.serializer','org.apache.spark.serializer.KryoSerializer')\n", + " .set('spark.executor.instances', '4')\n", + " .set('spark.executor.cores','4')\n", + " .set('spark.executor.memory', '8g')\n", + " .set('spark.driver.memory','20g')\n", + " .set('spark.memory.offHeap.enabled','True')\n", + " .set('spark.memory.offHeap.size','20g')\n", + " .set('spark.executor.memoryOverhead','1g')\n", + " .set('spark.executor.extraJavaOptions',\n", + " '-XX:+UseParallelGC -XX:+UseParallelOldGC -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps')\n", + " .set('spark.executorEnv.PYTHONPATH',f\"{os.environ['SPARK_HOME']}/python:{get_py4jzip()}:{':'.join(sys.path)}\")\n", + " .set('spark.sql.inMemoryColumnarStorage.compressed','False')\n", + " .set('spark.sql.inMemoryColumnarStorage.batchSize','100000')\n", + " .set('spark.sql.execution.arrow.pyspark.fallback.enabled','True')\n", + " .set('spark.sql.execution.arrow.pyspark.enabled','True')\n", + " .set('spark.sql.execution.arrow.maxRecordsPerBatch','100000')\n", + " .set(\"spark.sql.repl.eagerEval.enabled\", True)\n", + " .set(\"spark.sql.legacy.timeParserPolicy\",\"LEGACY\") \n", + " .set(\"spark.sql.session.timeZone\", tz)\n", + " )\n", + "\n", + "sc = SparkContext(conf=conf,master='yarn')\n", + "sc.setLogLevel(\"ERROR\")\n", + "spark = SQLContext(sc)\n", + "time.sleep(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "beaceea2", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:05.237940Z", + "iopub.status.busy": "2024-12-06T05:54:05.237576Z", + "iopub.status.idle": "2024-12-06T05:54:05.243620Z", + "shell.execute_reply": "2024-12-06T05:54:05.243229Z" + }, + "papermill": { + "duration": 0.01213, + "end_time": "2024-12-06T05:54:05.244853", + "exception": false, + "start_time": "2024-12-06T05:54:05.232723", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%html\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "96ff6bfd", + "metadata": { + "papermill": { + "duration": 0.004098, + "end_time": "2024-12-06T05:54:05.253289", + "exception": false, + "start_time": "2024-12-06T05:54:05.249191", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Sparklog" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96db8a10", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:05.287175Z", + "iopub.status.busy": "2024-12-06T05:54:05.286902Z", + "iopub.status.idle": "2024-12-06T05:54:07.652568Z", + "shell.execute_reply": "2024-12-06T05:54:07.652028Z" + }, + "papermill": { + "duration": 2.397178, + "end_time": "2024-12-06T05:54:07.654334", + "exception": false, + "start_time": "2024-12-06T05:54:05.257156", + "status": "completed" + }, + "scrolled": false, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%run ~/PAUS/sparklog.ipynb" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f2087dbe", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.664430Z", + "iopub.status.busy": "2024-12-06T05:54:07.664039Z", + "iopub.status.idle": "2024-12-06T05:54:07.666809Z", + "shell.execute_reply": "2024-12-06T05:54:07.666368Z" + }, + "papermill": { + "duration": 0.009062, + "end_time": "2024-12-06T05:54:07.668029", + "exception": false, + "start_time": "2024-12-06T05:54:07.658967", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "os.environ[\"https_proxy\"] = proxy\n", + "os.environ[\"http_proxy\"] = proxy" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "df22c6c4", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.677489Z", + "iopub.status.busy": "2024-12-06T05:54:07.677198Z", + "iopub.status.idle": "2024-12-06T05:54:07.679618Z", + "shell.execute_reply": "2024-12-06T05:54:07.679199Z" + }, + "papermill": { + "duration": 0.008559, + "end_time": "2024-12-06T05:54:07.680791", + "exception": false, + "start_time": "2024-12-06T05:54:07.672232", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "emonmetric=['emon_cpuutil',\n", + " 'emon_cpufreq',\n", + " 'emon_instr_retired',\n", + " 'emon_ipc']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "44921944", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.690327Z", + "iopub.status.busy": "2024-12-06T05:54:07.689959Z", + "iopub.status.idle": "2024-12-06T05:54:07.692308Z", + "shell.execute_reply": "2024-12-06T05:54:07.691898Z" + }, + "papermill": { + "duration": 0.008606, + "end_time": "2024-12-06T05:54:07.693519", + "exception": false, + "start_time": "2024-12-06T05:54:07.684913", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "e3b53125", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.702902Z", + "iopub.status.busy": "2024-12-06T05:54:07.702567Z", + "iopub.status.idle": "2024-12-06T05:54:07.705136Z", + "shell.execute_reply": "2024-12-06T05:54:07.704728Z" + }, + "papermill": { + "duration": 0.008666, + "end_time": "2024-12-06T05:54:07.706366", + "exception": false, + "start_time": "2024-12-06T05:54:07.697700", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "disk_prefix=[f\"'{dev}'\" for dev in disk.split(',')]\n", + "nic_prefix=[f\"'{dev}'\" for dev in nic.split(',')]" + ] + }, + { + "cell_type": "markdown", + "id": "04d5c054", + "metadata": { + "papermill": { + "duration": 0.00437, + "end_time": "2024-12-06T05:54:07.715026", + "exception": false, + "start_time": "2024-12-06T05:54:07.710656", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Content" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "004663b7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.725014Z", + "iopub.status.busy": "2024-12-06T05:54:07.724613Z", + "iopub.status.idle": "2024-12-06T05:54:07.729656Z", + "shell.execute_reply": "2024-12-06T05:54:07.729270Z" + }, + "papermill": { + "duration": 0.011505, + "end_time": "2024-12-06T05:54:07.730819", + "exception": false, + "start_time": "2024-12-06T05:54:07.719314", + "status": "completed" + }, + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + " 5 Self app info" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " 6 Compare to vanilla" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " 7 Config compare" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(HTML(' 5 Self app info'))\n", + "display(HTML(f\" 6 Compare to {comp_name}\"))\n", + "display(HTML(' 7 Config compare'))" + ] + }, + { + "cell_type": "markdown", + "id": "64cbb5ba", + "metadata": { + "papermill": { + "duration": 0.004589, + "end_time": "2024-12-06T05:54:07.739936", + "exception": false, + "start_time": "2024-12-06T05:54:07.735347", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Self app info" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0c621763", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.749952Z", + "iopub.status.busy": "2024-12-06T05:54:07.749546Z", + "iopub.status.idle": "2024-12-06T05:54:07.832927Z", + "shell.execute_reply": "2024-12-06T05:54:07.832513Z" + }, + "papermill": { + "duration": 0.090016, + "end_time": "2024-12-06T05:54:07.834415", + "exception": false, + "start_time": "2024-12-06T05:54:07.744399", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "app=Application_Run(appid, basedir=base_dir)\n", + "appals=app.analysis['app']['als']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "0195d322", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:07.844603Z", + "iopub.status.busy": "2024-12-06T05:54:07.844367Z", + "iopub.status.idle": "2024-12-06T05:54:33.914699Z", + "shell.execute_reply": "2024-12-06T05:54:33.914257Z" + }, + "papermill": { + "duration": 26.07688, + "end_time": "2024-12-06T05:54:33.916079", + "exception": false, + "start_time": "2024-12-06T05:54:07.839199", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "load data /sr213/application_1733153225851_0048/app.log\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 0:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 1:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 4:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 5:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 17:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 39:> (0 + 16) / 200]\r", + "\r", + "[Stage 39:=> (6 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 39:===> (14 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 39:========> (33 + 16) / 200]\r", + "\r", + "[Stage 39:==================> (67 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 39:==========================> (97 + 16) / 200]\r", + "\r", + "[Stage 39:==================================> (131 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 39:=================================================> (185 + 15) / 200]\r", + "\r", + " \r" + ] + }, + { + "data": { + "text/html": [ + "http://sr213:18080/history/application_1733153225851_0048" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 42:(177 + 5) / 200][Stage 43:> (0 + 1) / 1][Stage 44:>(0 + 11) / 200]\r", + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 44:(113 + 12) / 200][Stage 45:> (0 + 1) / 1][Stage 46:> (0 + 3) / 200]\r", + "\r", + "[Stage 44:(182 + 5) / 200][Stage 46:>(4 + 11) / 200][Stage 47:> (0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 46:(43 + 16) / 200][Stage 47:> (0 + 0) / 200][Stage 48:> (0 + 0) / 200]\r", + "\r", + "[Stage 46:(110 + 8) / 200][Stage 47:> (0 + 8) / 200][Stage 48:> (0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 46:(155 + 8) / 200][Stage 47:>(47 + 8) / 200][Stage 48:> (0 + 0) / 200]\r", + "\r", + "[Stage 46:(194 + 4) / 200][Stage 47:>(73 + 4) / 200][Stage 48:> (8 + 8) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 47:(114 + 8) / 200][Stage 48:>(57 + 4) / 200][Stage 49:> (0 + 4) / 16]\r", + "\r", + "[Stage 47:(185 + 4) / 200][Stage 48:>(73 + 4) / 200][Stage 49:> (0 + 8) / 16]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 48:(126 + 8) / 200][Stage 49:> (0 + 8) / 16][Stage 51:> (0 + 0) / 1]\r", + "\r", + "[Stage 48:(184 + 4) / 200][Stage 49:> (4 + 12) / 16][Stage 51:> (0 + 0) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
appidapplication_1733153225851_0048
executor.instances4
executor.cores4
shuffle.partitions32
batch size4,096
real executors4
Failed Tasks
Speculative Tasks0
Speculative Killed Tasks0
Speculative Stage0
runtime17.65
disk spilled0.0
memspilled0.0
local_read0.0
remote_read0.0
shuffle_write0.0
task run time6.79
ser_time0.0
f_wait_time0.0
gc_time0.03
input read22.54
acc_task_time13.99
file read size5,951.35
file write size24.52
disk read size5.05
disk write size15.31
disk cancel size0.0
\n", + "\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "{'appid': 'application_1733153225851_0048',\n", + " 'executor.instances': 4,\n", + " 'executor.cores': 4,\n", + " 'shuffle.partitions': 32,\n", + " 'batch size': 4096,\n", + " 'real executors': 4,\n", + " 'Failed Tasks': '',\n", + " 'Speculative Tasks': 0,\n", + " 'Speculative Killed Tasks': 0,\n", + " 'Speculative Stage': 0,\n", + " 'runtime': 17.65,\n", + " 'disk spilled': 0.0,\n", + " 'memspilled': 0.0,\n", + " 'local_read': 0.0,\n", + " 'remote_read': 0.0,\n", + " 'shuffle_write': 0.0,\n", + " 'task run time': 6.79,\n", + " 'ser_time': 0.0,\n", + " 'f_wait_time': 0.0,\n", + " 'gc_time': 0.03,\n", + " 'input read': 22.54,\n", + " 'acc_task_time': 13.99,\n", + " 'file read size': 5951.35,\n", + " 'file write size': 24.52,\n", + " 'disk read size': 5.05,\n", + " 'disk write size': 15.31,\n", + " 'disk cancel size': 0.0}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appals.get_basic_state()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4be7e21a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:54:33.929243Z", + "iopub.status.busy": "2024-12-06T05:54:33.928978Z", + "iopub.status.idle": "2024-12-06T05:55:12.939373Z", + "shell.execute_reply": "2024-12-06T05:55:12.938897Z" + }, + "papermill": { + "duration": 39.018383, + "end_time": "2024-12-06T05:55:12.940731", + "exception": false, + "start_time": "2024-12-06T05:54:33.922348", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 92:(161 + 4) / 200][Stage 93:>(68 + 9) / 200][Stage 94:> (8 + 4) / 200]\r", + "\r", + "[Stage 93:(151 + 4) / 200][Stage 94:>(66 + 8) / 200][Stage 95:> (1 + 4) / 16]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/sr213/application_1733153225851_0048/sr217/emon.parquet is not found, trying to load data ...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 129:> (0 + 2) / 2][Stage 130:> (0 + 2) / 2]\r", + "\r", + "[Stage 129:> (0 + 2) / 2][Stage 130:> (0 + 2) / 2][Stage 131:> (0 + 4) / 4]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 129:=> (1 + 1) / 2][Stage 130:=> (1 + 1) / 2][Stage 131:> (1 + 3) / 4]\r", + "\r", + "[Stage 129:========> (1 + 1) / 2][Stage 131:====> (1 + 3) / 4]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==> (16 + 16) / 400]\r", + "\r", + "[Stage 143:==> (17 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===> (24 + 16) / 400]\r", + "\r", + "[Stage 143:====> (34 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=====> (43 + 16) / 400]\r", + "\r", + "[Stage 143:=======> (53 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:========> (65 + 16) / 400]\r", + "\r", + "[Stage 143:=========> (72 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==========> (83 + 16) / 400]\r", + "\r", + "[Stage 143:===========> (90 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=============> (99 + 16) / 400]\r", + "\r", + "[Stage 143:=============> (106 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==============> (113 + 16) / 400]\r", + "\r", + "[Stage 143:===============> (118 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:================> (126 + 16) / 400]\r", + "\r", + "[Stage 143:=================> (132 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==================> (140 + 16) / 400]\r", + "\r", + "[Stage 143:==================> (146 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===================> (153 + 16) / 400]\r", + "\r", + "[Stage 143:====================> (160 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:======================> (170 + 16) / 400]\r", + "\r", + "[Stage 143:======================> (173 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=======================> (182 + 16) / 400]\r", + "\r", + "[Stage 143:========================> (186 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=========================> (194 + 16) / 400]\r", + "\r", + "[Stage 143:=========================> (197 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==========================> (204 + 16) / 400]\r", + "\r", + "[Stage 143:===========================> (211 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===========================> (214 + 16) / 400]\r", + "\r", + "[Stage 143:============================> (222 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=============================> (230 + 16) / 400]\r", + "\r", + "[Stage 143:==============================> (236 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===============================> (243 + 16) / 400]\r", + "\r", + "[Stage 143:================================> (249 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:=================================> (256 + 16) / 400]\r", + "\r", + "[Stage 143:==================================> (263 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===================================> (272 + 16) / 400]\r", + "\r", + "[Stage 143:====================================> (279 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:======================================> (294 + 16) / 400]\r", + "\r", + "[Stage 143:======================================> (299 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:========================================> (311 + 16) / 400]\r", + "\r", + "[Stage 143:=========================================> (322 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:===========================================> (333 + 17) / 400]\r", + "\r", + "[Stage 143:=============================================> (348 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==============================================> (360 + 16) / 400]\r", + "\r", + "[Stage 143:================================================> (372 + 16) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 143:==================================================> (386 + 14) / 400]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 148:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 154:> (0 + 3) / 3]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 154:===================> (1 + 2) / 3]\r", + "\r", + "[Stage 154:======================================> (2 + 1) / 3]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 157:> (0 + 3) / 3]\r", + "\r", + "[Stage 157:===================> (1 + 2) / 3]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 157:======================================> (2 + 1) / 3]\r", + "\r", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sar metric\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 application_1733153225851_0048
runtime17.650000
disk spilled0.000000
shuffle_write0.000000
f_wait_time0.000000
input read22.540000
acc_task_time13.990000
output rows1.180000
%user>90%0.931034
%kernel>10%0.965517
%iowait>10%0.620690
avg %user41.216207
avg %system4.514138
avg %iowait0.743793
avg disk util32.206897
time more than 90%0.000000
total read (G)5.388613
total write (G)1.121773
avg read bw (MB/s)190.273771
avg write bw (MB/s)39.610183
read bw %75411.578125
read bw %95484.542969
read bw max510.351562
time_rd_morethan_950.034483
write bw %751.074219
write bw %95165.687500
write bw max812.511719
time_wr_morethan_950.034483
cached mean93.896552
cached 75%145.000000
cached max188.000000
used mean834.000000
used 75%852.000000
used max859.000000
rx MB/s 75%0.000000
rx MB/s 95%0.000000
rx MB/s 99%0.000000
pgin mean190.206897
pgin 75%412.000000
pgin max509.000000
pgout mean40.965517
pgout 75%1.000000
pgout max840.000000
fault mean117653.310345
fault 75%205151.000000
fault max256538.000000
cpu%_avg0.448817
cpu freq_avg3241.915617
pathlength_sum1933.000000
ipc_avg1.137983
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "summary=app.get_summary(show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", + "display(summary.style)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "ae213d2c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:55:12.957566Z", + "iopub.status.busy": "2024-12-06T05:55:12.957206Z", + "iopub.status.idle": "2024-12-06T05:55:33.180803Z", + "shell.execute_reply": "2024-12-06T05:55:33.180353Z" + }, + "papermill": { + "duration": 20.233292, + "end_time": "2024-12-06T05:55:33.182105", + "exception": false, + "start_time": "2024-12-06T05:55:12.948813", + "status": "completed" + }, + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 330:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 341:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DEV in ('nvme0n1')\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 388:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 396:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "data": { + "text/plain": [ + "{'sr217': 200}" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 490:===================> (1 + 2) / 3]\r", + "\r", + " \r" + ] + }, + { + "data": { + "text/html": [ + "http://sr213:1088/tracing_examples/trace_viewer.html#/tracing/test_data/application_1733153225851_0048.json" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "app.generate_trace_view(showemon=True,show_metric=emonmetric,disk_prefix=disk_prefix,nic_prefix=nic_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "adde42f3", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:55:33.200290Z", + "iopub.status.busy": "2024-12-06T05:55:33.199984Z", + "iopub.status.idle": "2024-12-06T05:55:33.930601Z", + "shell.execute_reply": "2024-12-06T05:55:33.930143Z" + }, + "papermill": { + "duration": 0.741184, + "end_time": "2024-12-06T05:55:33.931895", + "exception": false, + "start_time": "2024-12-06T05:55:33.190711", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "gluten tpch_power 6600a1" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "appals.get_app_name()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b20c9ef4", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:55:33.949596Z", + "iopub.status.busy": "2024-12-06T05:55:33.949323Z", + "iopub.status.idle": "2024-12-06T05:56:11.412960Z", + "shell.execute_reply": "2024-12-06T05:56:11.412450Z" + }, + "papermill": { + "duration": 37.473642, + "end_time": "2024-12-06T05:56:11.414122", + "exception": false, + "start_time": "2024-12-06T05:55:33.940480", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 605:==> (11 + 0) / 200]\r", + "\r", + "[Stage 531:(174 + 16) / 200][Stage 532:>(0 + 0) / 200][Stage 533:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 532:(102 + 16) / 200][Stage 533:>(0 + 0) / 200][Stage 534:>(0 + 0) / 200]\r", + "\r", + "[Stage 533:(72 + 16) / 200][Stage 534:>(0 + 0) / 200][Stage 535:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 534:(63 + 16) / 200][Stage 535:>(0 + 0) / 200][Stage 536:>(0 + 0) / 200]\r", + "\r", + "[Stage 535:(75 + 16) / 200][Stage 536:>(0 + 0) / 200][Stage 537:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 536:(102 + 17) / 200][Stage 537:>(0 + 0) / 200][Stage 538:>(3 + 0) / 200]\r", + "\r", + "[Stage 537:(114 + 16) / 200][Stage 538:>(3 + 0) / 200][Stage 539:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 538:(105 + 16) / 200][Stage 539:>(0 + 0) / 200][Stage 540:>(0 + 0) / 200]\r", + "\r", + "[Stage 539:(67 + 16) / 200][Stage 540:>(0 + 0) / 200][Stage 541:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 540:(59 + 16) / 200][Stage 541:>(0 + 0) / 200][Stage 542:>(0 + 0) / 200]\r", + "\r", + "[Stage 541:(104 + 16) / 200][Stage 542:>(0 + 0) / 200][Stage 543:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 542:(115 + 16) / 200][Stage 543:>(0 + 0) / 200][Stage 544:>(0 + 0) / 200]\r", + "\r", + "[Stage 543:(148 + 16) / 200][Stage 544:>(0 + 0) / 200][Stage 545:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 545:(0 + 16) / 200][Stage 546:>(0 + 0) / 200][Stage 547:>(0 + 0) / 200]\r", + "\r", + "[Stage 546:(38 + 16) / 200][Stage 547:>(0 + 0) / 200][Stage 548:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 547:(58 + 17) / 200][Stage 548:>(0 + 0) / 200][Stage 549:>(0 + 0) / 200]\r", + "\r", + "[Stage 548:(94 + 16) / 200][Stage 549:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 549:(113 + 17) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]\r", + "\r", + "[Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 551:(16 + 1) / 200][Stage 553:>(0 + 0) / 200][Stage 555:>(0 + 0) / 200]\r", + "\r", + "[Stage 551:(179 + 16) / 200][Stage 553:>(0 + 0) / 200][Stage 555:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 553:(16 + 0) / 200][Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200]\r", + "\r", + "[Stage 553:(54 + 17) / 200][Stage 555:>(0 + 0) / 200][Stage 557:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 555:>(8 + 8) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]\r", + "\r", + "[Stage 555:(16 + 0) / 200][Stage 557:>(0 + 0) / 200][Stage 559:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 555:(199 + 1) / 200][Stage 557:(0 + 15) / 200][Stage 559:>(0 + 0) / 200]\r", + "\r", + "[Stage 557:(16 + 0) / 200][Stage 559:>(0 + 0) / 200][Stage 560:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 557:(84 + 16) / 200][Stage 559:>(0 + 0) / 200][Stage 560:>(0 + 0) / 200]\r", + "\r", + "[Stage 559:(16 + 0) / 200][Stage 560:>(0 + 0) / 200][Stage 564:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 559:(29 + 16) / 200][Stage 560:>(0 + 0) / 200][Stage 564:>(0 + 0) / 200]\r", + "\r", + "[Stage 560:(85 + 16) / 200][Stage 564:>(0 + 0) / 200][Stage 566:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 564:>(0 + 0) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 564:(16 + 0) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 564:(149 + 16) / 200][Stage 566:>(0 + 0) / 200][Stage 568:>(0 + 0) / 200]\r", + "\r", + "[Stage 566:(16 + 0) / 200][Stage 568:>(0 + 0) / 200][Stage 569:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 566:(71 + 16) / 200][Stage 568:>(0 + 0) / 200][Stage 569:>(0 + 0) / 200]\r", + "\r", + "[Stage 568:(16 + 0) / 200][Stage 569:>(0 + 0) / 200][Stage 573:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 568:(16 + 2) / 200][Stage 569:>(0 + 0) / 200][Stage 573:>(0 + 0) / 200]\r", + "\r", + "[Stage 569:(42 + 18) / 200][Stage 573:>(0 + 0) / 200][Stage 575:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 573:>(0 + 0) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 573:(0 + 16) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]\r", + "\r", + "[Stage 573:(16 + 0) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 573:(67 + 16) / 200][Stage 575:>(0 + 0) / 200][Stage 577:>(0 + 0) / 200]\r", + "\r", + "[Stage 575:(16 + 0) / 200][Stage 577:>(0 + 0) / 200][Stage 578:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 575:(144 + 16) / 200][Stage 577:>(0 + 0) / 200][Stage 578:>(0 + 0) / 200]\r", + "\r", + "[Stage 577:(16 + 0) / 200][Stage 578:>(0 + 0) / 200][Stage 581:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 577:(184 + 16) / 200][Stage 578:>(0 + 0) / 200][Stage 581:>(0 + 0) / 200]\r", + "\r", + "[Stage 578:(197 + 3) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]\r", + "\r", + "[Stage 580:(58 + 16) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 580:(176 + 17) / 200][Stage 581:>(0 + 0) / 200][Stage 585:>(0 + 0) / 200]\r", + "\r", + "[Stage 581:(195 + 5) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]\r", + "\r", + "[Stage 583:(69 + 17) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 583:(170 + 16) / 200][Stage 585:>(0 + 0) / 200][Stage 587:>(0 + 0) / 200]\r", + "\r", + "[Stage 585:(16 + 0) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 585:(75 + 16) / 200][Stage 587:>(0 + 0) / 200][Stage 589:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 587:(16 + 0) / 200][Stage 589:>(0 + 0) / 200][Stage 591:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 587:(16 + 6) / 200][Stage 589:>(0 + 0) / 200][Stage 591:>(0 + 0) / 200]\r", + "\r", + "[Stage 589:>(8 + 8) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 589:(16 + 0) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 589:(182 + 17) / 200][Stage 591:>(0 + 0) / 200][Stage 593:>(0 + 0) / 200]\r", + "\r", + "[Stage 591:(16 + 0) / 200][Stage 593:>(0 + 0) / 200][Stage 595:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 591:(133 + 17) / 200][Stage 593:>(0 + 0) / 200][Stage 595:>(0 + 0) / 200]\r", + "\r", + "[Stage 593:(16 + 0) / 200][Stage 595:>(0 + 0) / 200][Stage 597:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 593:(39 + 18) / 200][Stage 595:>(0 + 0) / 200][Stage 597:>(0 + 0) / 200]\r", + "\r", + "[Stage 595:(16 + 0) / 200][Stage 597:>(0 + 0) / 200][Stage 599:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 597:(16 + 0) / 200][Stage 599:>(0 + 0) / 200][Stage 600:>(0 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 599:(0 + 16) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]\r", + "\r", + "[Stage 599:(16 + 0) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 599:(169 + 16) / 200][Stage 600:>(0 + 0) / 200][Stage 605:(11 + 0) / 200]\r", + "\r", + "[Stage 605:==> (11 + 0) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 602:======> (102 + 18) / 200][Stage 605:> (11 + 0) / 200]\r", + "\r", + "[Stage 605:==> (11 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 606:============================================> (170 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 607:===================================> (135 + 16) / 200]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + " \r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 932:====================================================>(197 + 3) / 200]\r", + "\r", + " \r" + ] + } + ], + "source": [ + "if not 'vanilla' in name:\n", + " shuffle_df, dfx=appals.get_shuffle_stat()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "110d231a", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:11.436348Z", + "iopub.status.busy": "2024-12-06T05:56:11.436046Z", + "iopub.status.idle": "2024-12-06T05:56:19.512226Z", + "shell.execute_reply": "2024-12-06T05:56:19.511740Z" + }, + "papermill": { + "duration": 8.089268, + "end_time": "2024-12-06T05:56:19.514108", + "exception": false, + "start_time": "2024-12-06T05:56:11.424840", + "status": "completed" + }, + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + " application_1733153225851_0048 " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "http://sr213:18080/history/application_1733153225851_0048" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " query time " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 runtimedisk spilledmemspilledlocal_readremote_readshuffle_writedeser_timerun_timeser_timef_wait_timegc_timepeak_memqueryidinput readacc_task_timestagesoutput rowsexecutorscore/exectask.cpusparallelism
real_queryid                     
117.6500000.0000000.0000000.0000000.0000000.0000000.2000006.7900000.0000000.0000000.0300001.340000822.54000013.990000[ 8 10 12 15]1.18000044132
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " operator count " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 01
AQEShuffleRead02
AdaptiveSparkPlan01
ColumnarExchange02
FilterExecTransformer01
FlushableHashAggregateExecTransformer01
InputAdapter02
InputIteratorTransformer02
ProjectExecTransformer02
RegularHashAggregateExecTransformer01
Scan parquet 01
ShuffleQueryStage02
SortExecTransformer01
VeloxColumnarToRow01
VeloxResizeBatches02
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " operator input row count " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 1
ColumnarExchange0.000000
VeloxResizeBatches0.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + " operator output row count " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 1
ColumnarExchange0.000000
FlushableHashAggregateExecTransformer0.000000
InputIteratorTransformer0.000000
ProjectExecTransformer591.600000
RegularHashAggregateExecTransformer0.000000
Scan parquet 591.600000
SortExecTransformer0.000000
VeloxColumnarToRow0.000000
VeloxResizeBatches0.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "appals.get_app_info(disk_prefix=disk_prefix,nic_prefix=nic_prefix)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "1263e7ae", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:19.542523Z", + "iopub.status.busy": "2024-12-06T05:56:19.542250Z", + "iopub.status.idle": "2024-12-06T05:56:21.623772Z", + "shell.execute_reply": "2024-12-06T05:56:21.623288Z" + }, + "papermill": { + "duration": 2.097246, + "end_time": "2024-12-06T05:56:21.625534", + "exception": false, + "start_time": "2024-12-06T05:56:19.528288", + "status": "completed" + }, + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
44%_time of scan and filter7.53
36%_time of project6.13
16%_not_counted2.69
3%_idle0.55
0%_time of input iterator0.06
0%_time of aggregation0.03
0%_time to append / split batches0.00
0%_time of rowConstruction0.00
0%_time to split0.00
0%_time to deserialize0.00
0%_time of sort0.00
0%_time of extraction0.00
0%_shuffle write time0.00
0%_time to convert0.00
0%_time to compress0.00
0%_time to spill0.00
0%_time to decompress0.00
\n", + "
" + ], + "text/plain": [ + " 0\n", + "44%_time of scan and filter 7.53\n", + "36%_time of project 6.13\n", + "16%_not_counted 2.69\n", + " 3%_idle 0.55\n", + " 0%_time of input iterator 0.06\n", + " 0%_time of aggregation 0.03\n", + " 0%_time to append / split batches 0.00\n", + " 0%_time of rowConstruction 0.00\n", + " 0%_time to split 0.00\n", + " 0%_time to deserialize 0.00\n", + " 0%_time of sort 0.00\n", + " 0%_time of extraction 0.00\n", + " 0%_shuffle write time 0.00\n", + " 0%_time to convert 0.00\n", + " 0%_time to compress 0.00\n", + " 0%_time to spill 0.00\n", + " 0%_time to decompress 0.00" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "appals.show_critical_path_time_breakdown().T" + ] + }, + { + "cell_type": "markdown", + "id": "94f75901", + "metadata": { + "papermill": { + "duration": 0.014921, + "end_time": "2024-12-06T05:56:21.656661", + "exception": false, + "start_time": "2024-12-06T05:56:21.641740", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Compare to vanilla" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "f9051ad7", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:21.687372Z", + "iopub.status.busy": "2024-12-06T05:56:21.687092Z", + "iopub.status.idle": "2024-12-06T05:56:57.099205Z", + "shell.execute_reply": "2024-12-06T05:56:57.098700Z" + }, + "papermill": { + "duration": 35.429191, + "end_time": "2024-12-06T05:56:57.100672", + "exception": false, + "start_time": "2024-12-06T05:56:21.671481", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "load data /sr213/application_1733153225851_0029/app.log\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "emon metric\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 1319:> (0 + 3) / 3]\r" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "[Stage 1319:==================> (1 + 2) / 3]\r", + "\r", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sar metric\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "time breakdown\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 application_1733153225851_0029application_1733153225851_0048diff
runtime132.1417.65648.67%
shuffle_write0.000.000.00%
f_wait_time0.000.000.00%
input read22.5422.540.00%
acc_task_time128.0113.99815.01%
output rows1.791.1851.69%
%user>90%0.990.935.91%
%kernel>10%0.990.972.85%
%iowait>10%0.310.62-49.30%
avg %user82.1141.2299.21%
avg %system6.104.5135.11%
avg %iowait0.170.74-76.60%
avg disk util7.1332.21-77.85%
time more than 90%0.000.000.00%
total read (G)5.245.39-2.75%
total write (G)0.021.12-97.81%
avg read bw (MB/s)37.52190.27-80.28%
avg write bw (MB/s)0.1839.61-99.55%
read bw %7559.27411.58-85.60%
read bw %95173.05484.54-64.29%
read bw max236.70510.35-53.62%
time_rd_morethan_950.050.0341.96%
write bw %750.071.07-93.45%
write bw %951.23165.69-99.25%
write bw max1.70812.51-99.79%
time_wr_morethan_950.000.03-100.00%
cached mean88.3393.90-5.93%
cached 75%132.00145.00-8.97%
cached max160.00188.00-14.89%
used mean2,060.73834.00147.09%
used 75%2,343.00852.00175.00%
used max2,346.00859.00173.11%
rx MB/s 75%0.000.000.00%
rx MB/s 95%0.000.000.00%
rx MB/s 99%0.000.000.00%
pgin mean37.37190.21-80.35%
pgin 75%59.00412.00-85.68%
pgin max352.00509.00-30.84%
pgout mean0.1340.97-99.68%
pgout 75%0.001.00-100.00%
pgout max2.00840.00-99.76%
fault mean952,586.87117,653.31709.66%
fault 75%1,426,717.00205,151.00595.45%
fault max2,628,392.00256,538.00924.56%
cpu%_avg0.880.4596.45%
cpu freq_avg3,460.223,241.926.73%
pathlength_sum17,960.001,933.00829.13%
ipc_avg1.271.1411.30%
\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 runtimeshuffle_writef_wait_timeinput readacc_task_timeoutput rows
real_queryid      
1\n", + "
132.14
\n", + "
17.65
\n", + "
648.67%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
22.54
\n", + "
22.54
\n", + "
0.00%
\n", + "
\n", + "
128.01
\n", + "
13.99
\n", + "
815.01%
\n", + "
\n", + "
1.79
\n", + "
1.18
\n", + "
51.69%
\n", + "
\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 sr217agg
0  
%user>90%\n", + "
0.99
\n", + "
0.93
\n", + "
5.91%
\n", + "
\n", + "
0.99
\n", + "
0.93
\n", + "
5.91%
\n", + "
%kernel>10%\n", + "
0.99
\n", + "
0.97
\n", + "
2.85%
\n", + "
\n", + "
0.99
\n", + "
0.97
\n", + "
2.85%
\n", + "
%iowait>10%\n", + "
0.31
\n", + "
0.62
\n", + "
-49.30%
\n", + "
\n", + "
0.31
\n", + "
0.62
\n", + "
-49.30%
\n", + "
avg %user\n", + "
82.11
\n", + "
41.22
\n", + "
99.21%
\n", + "
\n", + "
82.11
\n", + "
41.22
\n", + "
99.21%
\n", + "
avg %system\n", + "
6.10
\n", + "
4.51
\n", + "
35.11%
\n", + "
\n", + "
6.10
\n", + "
4.51
\n", + "
35.11%
\n", + "
avg %iowait\n", + "
0.17
\n", + "
0.74
\n", + "
-76.60%
\n", + "
\n", + "
0.17
\n", + "
0.74
\n", + "
-76.60%
\n", + "
avg disk util\n", + "
7.13
\n", + "
32.21
\n", + "
-77.85%
\n", + "
\n", + "
7.13
\n", + "
32.21
\n", + "
-77.85%
\n", + "
time more than 90%\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
total read (G)\n", + "
5.24
\n", + "
5.39
\n", + "
-2.75%
\n", + "
\n", + "
5.24
\n", + "
5.39
\n", + "
-2.75%
\n", + "
total write (G)\n", + "
0.02
\n", + "
1.12
\n", + "
-97.81%
\n", + "
\n", + "
0.02
\n", + "
1.12
\n", + "
-97.81%
\n", + "
avg read bw (MB/s)\n", + "
37.52
\n", + "
190.27
\n", + "
-80.28%
\n", + "
\n", + "
37.52
\n", + "
190.27
\n", + "
-80.28%
\n", + "
avg write bw (MB/s)\n", + "
0.18
\n", + "
39.61
\n", + "
-99.55%
\n", + "
\n", + "
0.18
\n", + "
39.61
\n", + "
-99.55%
\n", + "
read bw %75\n", + "
59.27
\n", + "
411.58
\n", + "
-85.60%
\n", + "
\n", + "
59.27
\n", + "
411.58
\n", + "
-85.60%
\n", + "
read bw %95\n", + "
173.05
\n", + "
484.54
\n", + "
-64.29%
\n", + "
\n", + "
173.05
\n", + "
484.54
\n", + "
-64.29%
\n", + "
read bw max\n", + "
236.70
\n", + "
510.35
\n", + "
-53.62%
\n", + "
\n", + "
236.70
\n", + "
510.35
\n", + "
-53.62%
\n", + "
time_rd_morethan_95\n", + "
0.05
\n", + "
0.03
\n", + "
41.96%
\n", + "
\n", + "
0.05
\n", + "
0.03
\n", + "
41.96%
\n", + "
write bw %75\n", + "
0.07
\n", + "
1.07
\n", + "
-93.45%
\n", + "
\n", + "
0.07
\n", + "
1.07
\n", + "
-93.45%
\n", + "
write bw %95\n", + "
1.23
\n", + "
165.69
\n", + "
-99.25%
\n", + "
\n", + "
1.23
\n", + "
165.69
\n", + "
-99.25%
\n", + "
write bw max\n", + "
1.70
\n", + "
812.51
\n", + "
-99.79%
\n", + "
\n", + "
1.70
\n", + "
812.51
\n", + "
-99.79%
\n", + "
time_wr_morethan_95\n", + "
0.00
\n", + "
0.03
\n", + "
-100.00%
\n", + "
\n", + "
0.00
\n", + "
0.03
\n", + "
-100.00%
\n", + "
cached mean\n", + "
88.33
\n", + "
93.90
\n", + "
-5.93%
\n", + "
\n", + "
88.33
\n", + "
93.90
\n", + "
-5.93%
\n", + "
cached 75%\n", + "
132.00
\n", + "
145.00
\n", + "
-8.97%
\n", + "
\n", + "
132.00
\n", + "
145.00
\n", + "
-8.97%
\n", + "
cached max\n", + "
160.00
\n", + "
188.00
\n", + "
-14.89%
\n", + "
\n", + "
160.00
\n", + "
188.00
\n", + "
-14.89%
\n", + "
used mean\n", + "
2,060.73
\n", + "
834.00
\n", + "
147.09%
\n", + "
\n", + "
2,060.73
\n", + "
834.00
\n", + "
147.09%
\n", + "
used 75%\n", + "
2,343.00
\n", + "
852.00
\n", + "
175.00%
\n", + "
\n", + "
2,343.00
\n", + "
852.00
\n", + "
175.00%
\n", + "
used max\n", + "
2,346.00
\n", + "
859.00
\n", + "
173.11%
\n", + "
\n", + "
2,346.00
\n", + "
859.00
\n", + "
173.11%
\n", + "
rx MB/s 75%\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
rx MB/s 95%\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
rx MB/s 99%\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
\n", + "
0.00
\n", + "
0.00
\n", + "
nan%
\n", + "
pgin mean\n", + "
37.37
\n", + "
190.21
\n", + "
-80.35%
\n", + "
\n", + "
37.37
\n", + "
190.21
\n", + "
-80.35%
\n", + "
pgin 75%\n", + "
59.00
\n", + "
412.00
\n", + "
-85.68%
\n", + "
\n", + "
59.00
\n", + "
412.00
\n", + "
-85.68%
\n", + "
pgin max\n", + "
352.00
\n", + "
509.00
\n", + "
-30.84%
\n", + "
\n", + "
352.00
\n", + "
509.00
\n", + "
-30.84%
\n", + "
pgout mean\n", + "
0.13
\n", + "
40.97
\n", + "
-99.68%
\n", + "
\n", + "
0.13
\n", + "
40.97
\n", + "
-99.68%
\n", + "
pgout 75%\n", + "
0.00
\n", + "
1.00
\n", + "
-100.00%
\n", + "
\n", + "
0.00
\n", + "
1.00
\n", + "
-100.00%
\n", + "
pgout max\n", + "
2.00
\n", + "
840.00
\n", + "
-99.76%
\n", + "
\n", + "
2.00
\n", + "
840.00
\n", + "
-99.76%
\n", + "
fault mean\n", + "
952,586.87
\n", + "
117,653.31
\n", + "
709.66%
\n", + "
\n", + "
952,586.87
\n", + "
117,653.31
\n", + "
709.66%
\n", + "
fault 75%\n", + "
1,426,717.00
\n", + "
205,151.00
\n", + "
595.45%
\n", + "
\n", + "
1,426,717.00
\n", + "
205,151.00
\n", + "
595.45%
\n", + "
fault max\n", + "
2,628,392.00
\n", + "
256,538.00
\n", + "
924.56%
\n", + "
\n", + "
2,628,392.00
\n", + "
256,538.00
\n", + "
924.56%
\n", + "
\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
clientsr217agg
cpu%_avg\n", + "
0.88
\n", + "
0.45
\n", + "
96.45%
\n", + "
\n", + "
0.88
\n", + "
0.45
\n", + "
96.45%
\n", + "
cpu freq_avg\n", + "
3,460.22
\n", + "
3,241.92
\n", + "
6.73%
\n", + "
\n", + "
3,460.22
\n", + "
3,241.92
\n", + "
6.73%
\n", + "
pathlength_sum\n", + "
17,960.00
\n", + "
1,933.00
\n", + "
829.13%
\n", + "
\n", + "
17,960.00
\n", + "
1,933.00
\n", + "
829.13%
\n", + "
ipc_avg\n", + "
1.27
\n", + "
1.14
\n", + "
11.30%
\n", + "
\n", + "
1.27
\n", + "
1.14
\n", + "
11.30%
\n", + "
\n", + "\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 indexStage IDJob IDreal_queryidqueryidtotal_timestdev_timeacc_totaltotal
008818127.981.9199.65%99.65%
11109180.29nan99.87%0.23%
221210180.09nan99.94%0.07%
331511180.07nan100.00%0.06%
\n", + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 indexStage IDJob IDreal_queryidqueryidtotal_timestdev_timeacc_totaltotal
00881813.860.3286.65%86.65%
111210180.98nan92.80%6.15%
22109180.74nan97.43%4.63%
331511180.41nan100.00%2.57%
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if comp_appid:\n", + " comp_app=Application_Run(comp_appid,basedir=comp_base_dir)\n", + " output=app.compare_app(rapp=comp_app,show_metric=emonmetric,show_queryplan_diff=False,disk_prefix=disk_prefix,nic_prefix=nic_prefix)\n", + " display(HTML(output))" + ] + }, + { + "cell_type": "markdown", + "id": "572607be", + "metadata": { + "papermill": { + "duration": 0.019224, + "end_time": "2024-12-06T05:56:57.140390", + "exception": false, + "start_time": "2024-12-06T05:56:57.121166", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Config compare" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "0b4f3632", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:57.179356Z", + "iopub.status.busy": "2024-12-06T05:56:57.179070Z", + "iopub.status.idle": "2024-12-06T05:56:58.328465Z", + "shell.execute_reply": "2024-12-06T05:56:58.327997Z" + }, + "papermill": { + "duration": 1.170805, + "end_time": "2024-12-06T05:56:58.330214", + "exception": false, + "start_time": "2024-12-06T05:56:57.159409", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0851_00480851_0029comp
callSite.shortcollect at /tmp/ipykernel_265482/1936321720.py:117collect at /tmp/ipykernel_234307/1936321720.py:117False
spark.app.submitTime17334643016691733457038427False
spark.executor.extraClassPathfile:///data0/home/sparkuser/jars/6600a164407ae0e4f5ea5b33dc4b902f23a27730/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-snapshot.jarFalse
spark.executor.extraJavaOptions-xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/home/sparkuser/logs/java/hs_err_pid%p.log-xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/data0/home/sparkuser/logs/java/hs_err_pid%p.logFalse
spark.executor.memory10944m29184mFalse
spark.gluten.memory.conservative.task.offHeap.size.in.bytes10041163776NaNFalse
spark.gluten.memory.dynamic.offHeap.sizing.enabledfalseNaNFalse
spark.gluten.memory.offHeap.size.in.bytes80329310208NaNFalse
spark.gluten.memory.overAcquiredMemoryRatio0NaNFalse
spark.gluten.memory.task.offHeap.size.in.bytes20082327552NaNFalse
spark.gluten.memoryOverhead.size.in.bytes1073741824NaNFalse
spark.gluten.numTaskSlotsPerExecutor4NaNFalse
spark.gluten.sql.columnar.backend.libveloxNaNFalse
spark.gluten.sql.columnar.coalesce.batchestrueNaNFalse
spark.gluten.sql.columnar.forceshuffledhashjointrueNaNFalse
spark.gluten.sql.columnar.maxBatchSize4096NaNFalse
spark.gluten.sql.columnar.shuffle.codeclz4NaNFalse
spark.gluten.sql.columnar.shuffle.codecBackendNaNFalse
spark.gluten.sql.session.timeZone.defaultetc/utcNaNFalse
spark.memory.offHeap.size8032931020858368mFalse
spark.pluginsorg.apache.gluten.glutenpluginNaNFalse
spark.repl.class.outputDir/tmp/tmpypqh85b0/tmp/tmpynceqaxdFalse
spark.repl.class.urispark://sr213:40521/classesspark://sr213:34951/classesFalse
spark.shuffle.managerorg.apache.spark.shuffle.sort.columnarshufflemanagerNaNFalse
spark.sql.adaptive.customCostEvaluatorClassorg.apache.spark.sql.execution.adaptive.glutencostevaluatorNaNFalse
spark.sql.extensionsorg.apache.gluten.extension.glutensessionextensionsNaNFalse
spark.sql.files.maxPartitionBytes4gNaNFalse
spark.sql.shuffle.partitions3264False
\n", + "
" + ], + "text/plain": [ + " 0851_0048 \\\n", + "callSite.short collect at /tmp/ipykernel_265482/1936321720.py:117 \n", + "spark.app.submitTime 1733464301669 \n", + "spark.executor.extraClassPath file:///data0/home/sparkuser/jars/6600a164407ae0e4f5ea5b33dc4b902f23a27730/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.3.0-snapshot.jar \n", + "spark.executor.extraJavaOptions -xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/home/sparkuser/logs/java/hs_err_pid%p.log \n", + "spark.executor.memory 10944m \n", + "spark.gluten.memory.conservative.task.offHeap.size.in.bytes 10041163776 \n", + "spark.gluten.memory.dynamic.offHeap.sizing.enabled false \n", + "spark.gluten.memory.offHeap.size.in.bytes 80329310208 \n", + "spark.gluten.memory.overAcquiredMemoryRatio 0 \n", + "spark.gluten.memory.task.offHeap.size.in.bytes 20082327552 \n", + "spark.gluten.memoryOverhead.size.in.bytes 1073741824 \n", + "spark.gluten.numTaskSlotsPerExecutor 4 \n", + "spark.gluten.sql.columnar.backend.lib velox \n", + "spark.gluten.sql.columnar.coalesce.batches true \n", + "spark.gluten.sql.columnar.forceshuffledhashjoin true \n", + "spark.gluten.sql.columnar.maxBatchSize 4096 \n", + "spark.gluten.sql.columnar.shuffle.codec lz4 \n", + "spark.gluten.sql.columnar.shuffle.codecBackend \n", + "spark.gluten.sql.session.timeZone.default etc/utc \n", + "spark.memory.offHeap.size 80329310208 \n", + "spark.plugins org.apache.gluten.glutenplugin \n", + "spark.repl.class.outputDir /tmp/tmpypqh85b0 \n", + "spark.repl.class.uri spark://sr213:40521/classes \n", + "spark.shuffle.manager org.apache.spark.shuffle.sort.columnarshufflemanager \n", + "spark.sql.adaptive.customCostEvaluatorClass org.apache.spark.sql.execution.adaptive.glutencostevaluator \n", + "spark.sql.extensions org.apache.gluten.extension.glutensessionextensions \n", + "spark.sql.files.maxPartitionBytes 4g \n", + "spark.sql.shuffle.partitions 32 \n", + "\n", + " 0851_0029 \\\n", + "callSite.short collect at /tmp/ipykernel_234307/1936321720.py:117 \n", + "spark.app.submitTime 1733457038427 \n", + "spark.executor.extraClassPath \n", + "spark.executor.extraJavaOptions -xx:+ignoreunrecognizedvmoptions --add-opens=java.base/java.lang=all-unnamed --add-opens=java.base/java.lang.invoke=all-unnamed --add-opens=java.base/java.lang.reflect=all-unnamed --add-opens=java.base/java.io=all-unnamed --add-opens=java.base/java.net=all-unnamed --add-opens=java.base/java.nio=all-unnamed --add-opens=java.base/java.util=all-unnamed --add-opens=java.base/java.util.concurrent=all-unnamed --add-opens=java.base/java.util.concurrent.atomic=all-unnamed --add-opens=java.base/sun.nio.ch=all-unnamed --add-opens=java.base/sun.nio.cs=all-unnamed --add-opens=java.base/sun.security.action=all-unnamed --add-opens=java.base/sun.util.calendar=all-unnamed --add-opens=java.security.jgss/sun.security.krb5=all-unnamed -xx:+useparalleloldgc -xx:parallelgcthreads=2 -xx:newratio=1 -xx:survivorratio=1 -xx:+usecompressedoops -verbose:gc -xx:+printgcdetails -xx:+printgctimestamps -xx:errorfile=/data0/home/sparkuser/logs/java/hs_err_pid%p.log \n", + "spark.executor.memory 29184m \n", + "spark.gluten.memory.conservative.task.offHeap.size.in.bytes NaN \n", + "spark.gluten.memory.dynamic.offHeap.sizing.enabled NaN \n", + "spark.gluten.memory.offHeap.size.in.bytes NaN \n", + "spark.gluten.memory.overAcquiredMemoryRatio NaN \n", + "spark.gluten.memory.task.offHeap.size.in.bytes NaN \n", + "spark.gluten.memoryOverhead.size.in.bytes NaN \n", + "spark.gluten.numTaskSlotsPerExecutor NaN \n", + "spark.gluten.sql.columnar.backend.lib NaN \n", + "spark.gluten.sql.columnar.coalesce.batches NaN \n", + "spark.gluten.sql.columnar.forceshuffledhashjoin NaN \n", + "spark.gluten.sql.columnar.maxBatchSize NaN \n", + "spark.gluten.sql.columnar.shuffle.codec NaN \n", + "spark.gluten.sql.columnar.shuffle.codecBackend NaN \n", + "spark.gluten.sql.session.timeZone.default NaN \n", + "spark.memory.offHeap.size 58368m \n", + "spark.plugins NaN \n", + "spark.repl.class.outputDir /tmp/tmpynceqaxd \n", + "spark.repl.class.uri spark://sr213:34951/classes \n", + "spark.shuffle.manager NaN \n", + "spark.sql.adaptive.customCostEvaluatorClass NaN \n", + "spark.sql.extensions NaN \n", + "spark.sql.files.maxPartitionBytes NaN \n", + "spark.sql.shuffle.partitions 64 \n", + "\n", + " comp \n", + "callSite.short False \n", + "spark.app.submitTime False \n", + "spark.executor.extraClassPath False \n", + "spark.executor.extraJavaOptions False \n", + "spark.executor.memory False \n", + "spark.gluten.memory.conservative.task.offHeap.size.in.bytes False \n", + "spark.gluten.memory.dynamic.offHeap.sizing.enabled False \n", + "spark.gluten.memory.offHeap.size.in.bytes False \n", + "spark.gluten.memory.overAcquiredMemoryRatio False \n", + "spark.gluten.memory.task.offHeap.size.in.bytes False \n", + "spark.gluten.memoryOverhead.size.in.bytes False \n", + "spark.gluten.numTaskSlotsPerExecutor False \n", + "spark.gluten.sql.columnar.backend.lib False \n", + "spark.gluten.sql.columnar.coalesce.batches False \n", + "spark.gluten.sql.columnar.forceshuffledhashjoin False \n", + "spark.gluten.sql.columnar.maxBatchSize False \n", + "spark.gluten.sql.columnar.shuffle.codec False \n", + "spark.gluten.sql.columnar.shuffle.codecBackend False \n", + "spark.gluten.sql.session.timeZone.default False \n", + "spark.memory.offHeap.size False \n", + "spark.plugins False \n", + "spark.repl.class.outputDir False \n", + "spark.repl.class.uri False \n", + "spark.shuffle.manager False \n", + "spark.sql.adaptive.customCostEvaluatorClass False \n", + "spark.sql.extensions False \n", + "spark.sql.files.maxPartitionBytes False \n", + "spark.sql.shuffle.partitions False " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if comp_appid:\n", + " comp_appals=comp_app.analysis['app']['als']\n", + " display(comp_spark_conf(appals, comp_appals))" + ] + }, + { + "cell_type": "markdown", + "id": "20b5f6f2", + "metadata": { + "papermill": { + "duration": 0.020157, + "end_time": "2024-12-06T05:56:58.371233", + "exception": false, + "start_time": "2024-12-06T05:56:58.351076", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Convert to HTML" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "bd866a20", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:58.412619Z", + "iopub.status.busy": "2024-12-06T05:56:58.412337Z", + "iopub.status.idle": "2024-12-06T05:56:58.416007Z", + "shell.execute_reply": "2024-12-06T05:56:58.415586Z" + }, + "papermill": { + "duration": 0.025916, + "end_time": "2024-12-06T05:56:58.417156", + "exception": false, + "start_time": "2024-12-06T05:56:58.391240", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%javascript\n", + "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "83323888", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:58.459405Z", + "iopub.status.busy": "2024-12-06T05:56:58.459137Z", + "iopub.status.idle": "2024-12-06T05:56:58.461591Z", + "shell.execute_reply": "2024-12-06T05:56:58.461165Z" + }, + "papermill": { + "duration": 0.024889, + "end_time": "2024-12-06T05:56:58.462703", + "exception": false, + "start_time": "2024-12-06T05:56:58.437814", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# htmlname=nb_name.replace(\"ipynb\",\"html\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "98b1ba3b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-12-06T05:56:58.505858Z", + "iopub.status.busy": "2024-12-06T05:56:58.505587Z", + "iopub.status.idle": "2024-12-06T05:56:58.508041Z", + "shell.execute_reply": "2024-12-06T05:56:58.507614Z" + }, + "papermill": { + "duration": 0.024884, + "end_time": "2024-12-06T05:56:58.509167", + "exception": false, + "start_time": "2024-12-06T05:56:58.484283", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# !jupyter nbconvert --to html ./{nb_name} --no-input --output html/{htmlname} --template classic" + ] + } + ], + "metadata": { + "celltoolbar": "Tags", + "hide_input": false, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "nbTranslate": { + "displayLangs": [ + "*" + ], + "hotkey": "alt-t", + "langInMainMenu": true, + "sourceLang": "en", + "targetLang": "fr", + "useGoogleTranslate": true + }, + "papermill": { + "default_parameters": {}, + "duration": 207.873445, + "end_time": "2024-12-06T05:57:01.150405", + "environment_variables": {}, + "exception": null, + "input_path": "2024_12_06_055328_tpch_gluten_application_1733153225851_0048.ipynb", + "output_path": "2024_12_06_055328_tpch_gluten_application_1733153225851_0048.nbconvert.ipynb", + "parameters": { + "appid": "application_1733153225851_0048", + "base_dir": "sr213", + "comp_appid": "application_1733153225851_0029", + "comp_base_dir": "sr213", + "comp_name": "vanilla", + "disk": "nvme0n1", + "name": "tpch_gluten", + "nic": "enp61s0f0", + "proxy": "http://10.239.44.250:8080", + "tz": "Etc/GMT+0" + }, + "start_time": "2024-12-06T05:53:33.276960", + "version": "2.6.0" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": false, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": { + "height": "197px", + "left": "2188px", + "top": "111px", + "width": "269px" + }, + "toc_section_display": true, + "toc_window_display": true + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/tools/workload/benchmark_velox/sample/trace_result_tpch_q1.json b/tools/workload/benchmark_velox/sample/trace_result_tpch_q1.json new file mode 100644 index 000000000000..0ca3b995a26c --- /dev/null +++ b/tools/workload/benchmark_velox/sample/trace_result_tpch_q1.json @@ -0,0 +1,776 @@ + + { + "traceEvents": [ + + {"name": "process_name", "ph": "M", "pid": 100300, "tid": 0, "args": {"name": "sr217.3"}}, +{"tid": 100300, "ts": -32615, "dur": 1647, "pid": 100300, "ph": "X", "name": "stg0", "args": {"job id": 0, "stage id": 0, "tskid": 0, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"name": "process_name", "ph": "M", "pid": 100200, "tid": 0, "args": {"name": "sr217.2"}}, +{"tid": 100200, "ts": -29154, "dur": 1639, "pid": 100200, "ph": "X", "name": "stg1", "args": {"job id": 1, "stage id": 1, "tskid": 1, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100300, "ts": -27419, "dur": 53, "pid": 100300, "ph": "X", "name": "stg2", "args": {"job id": 2, "stage id": 2, "tskid": 2, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100200, "ts": -27286, "dur": 51, "pid": 100200, "ph": "X", "name": "stg3", "args": {"job id": 3, "stage id": 3, "tskid": 3, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"name": "process_name", "ph": "M", "pid": 100400, "tid": 0, "args": {"name": "sr217.4"}}, +{"tid": 100400, "ts": -27151, "dur": 1554, "pid": 100400, "ph": "X", "name": "stg4", "args": {"job id": 4, "stage id": 4, "tskid": 4, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"name": "process_name", "ph": "M", "pid": 100100, "tid": 0, "args": {"name": "sr217.1"}}, +{"tid": 100100, "ts": -25511, "dur": 1641, "pid": 100100, "ph": "X", "name": "stg5", "args": {"job id": 5, "stage id": 5, "tskid": 5, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100400, "ts": -23791, "dur": 48, "pid": 100400, "ph": "X", "name": "stg6", "args": {"job id": 6, "stage id": 6, "tskid": 6, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100100, "ts": -23672, "dur": 53, "pid": 100100, "ph": "X", "name": "stg7", "args": {"job id": 7, "stage id": 7, "tskid": 7, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100302, "ts": -22513, "dur": 13342, "pid": 100300, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 21, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100303, "ts": -22513, "dur": 13346, "pid": 100300, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 17, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100203, "ts": -22512, "dur": 13580, "pid": 100200, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 23, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100202, "ts": -22513, "dur": 13590, "pid": 100200, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 19, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100201, "ts": -22514, "dur": 13650, "pid": 100200, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 11, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100301, "ts": -22514, "dur": 13681, "pid": 100300, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 13, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100300, "ts": -22515, "dur": 13686, "pid": 100300, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 9, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100103, "ts": -22513, "dur": 13732, "pid": 100100, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 18, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100200, "ts": -22514, "dur": 13745, "pid": 100200, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 15, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100100, "ts": -22514, "dur": 13757, "pid": 100100, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 14, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100102, "ts": -22513, "dur": 13846, "pid": 100100, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 22, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100400, "ts": -22517, "dur": 14209, "pid": 100400, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 8, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100403, "ts": -22513, "dur": 14262, "pid": 100400, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 16, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100101, "ts": -22514, "dur": 14303, "pid": 100100, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 10, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100401, "ts": -22514, "dur": 14336, "pid": 100400, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 12, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100402, "ts": -22513, "dur": 14339, "pid": 100400, "ph": "X", "name": "stg8", "args": {"job id": 8, "stage id": 8, "tskid": 20, "input": 315.73, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100300, "ts": -7787, "dur": 744, "pid": 100300, "ph": "X", "name": "stg10", "args": {"job id": 9, "stage id": 10, "tskid": 24, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.03, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100400, "ts": -7009, "dur": 980, "pid": 100400, "ph": "X", "name": "stg12", "args": {"job id": 10, "stage id": 12, "tskid": 25, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.03, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid": 100200, "ts": -5889, "dur": 451, "pid": 100200, "ph": "X", "name": "stg15", "args": {"job id": 11, "stage id": 15, "tskid": 26, "input": 0.0, "spill": 0.0, "Shuffle Read Metrics": "", "|---Local Read": 0.0, "|---Remote Read": 0.0, "Shuffle Write Metrics": "", "|---Write": 0.0}}, +{"tid":38,"ts":-32614,"dur":1646,"pid":99999,"ph":"X","name":"stg0","args":{"taskid":0,"exec_id":3,"host":"sr217"}}, +{"tid":38,"ts":-29153,"dur":1638,"pid":99999,"ph":"X","name":"stg1","args":{"taskid":1,"exec_id":2,"host":"sr217"}}, +{"tid":38,"ts":-27418,"dur":52,"pid":99999,"ph":"X","name":"stg2","args":{"taskid":2,"exec_id":3,"host":"sr217"}}, +{"tid":38,"ts":-27285,"dur":50,"pid":99999,"ph":"X","name":"stg3","args":{"taskid":3,"exec_id":2,"host":"sr217"}}, +{"tid":38,"ts":-27150,"dur":1553,"pid":99999,"ph":"X","name":"stg4","args":{"taskid":4,"exec_id":4,"host":"sr217"}}, +{"tid":38,"ts":-25510,"dur":1640,"pid":99999,"ph":"X","name":"stg5","args":{"taskid":5,"exec_id":1,"host":"sr217"}}, +{"tid":38,"ts":-23790,"dur":47,"pid":99999,"ph":"X","name":"stg6","args":{"taskid":6,"exec_id":4,"host":"sr217"}}, +{"tid":38,"ts":-23671,"dur":52,"pid":99999,"ph":"X","name":"stg7","args":{"taskid":7,"exec_id":1,"host":"sr217"}}, +{"tid":38,"ts":-22512,"dur":14338,"pid":99999,"ph":"X","name":"stg8","args":{"taskid":20,"exec_id":4,"host":"sr217"}}, +{"tid":38,"ts":-7786,"dur":743,"pid":99999,"ph":"X","name":"stg10","args":{"taskid":24,"exec_id":3,"host":"sr217"}}, +{"tid":38,"ts":-7008,"dur":979,"pid":99999,"ph":"X","name":"stg12","args":{"taskid":25,"exec_id":4,"host":"sr217"}}, +{"tid":38,"ts":-5888,"dur":450,"pid":99999,"ph":"X","name":"stg15","args":{"taskid":26,"exec_id":2,"host":"sr217"}}, +{"tid":38,"ts":-32615,"dur":8996,"pid":99999,"ph":"X","name":"qry0"}, +{"tid":38,"ts":-22517,"dur":17079,"pid":99999,"ph":"X","name":"qry1"}, +{"tid":38,"ts":-22512,"dur":7552,"pid":99999,"ph":"X","name":"time of scan and filter"}, +{"tid":38,"ts":-14959,"dur":6166,"pid":99999,"ph":"X","name":"time of project"}, +{"tid":38,"ts":-8792,"dur":32,"pid":99999,"ph":"X","name":"time of aggregation"}, +{"tid":38,"ts":-7786,"dur":21,"pid":99999,"ph":"X","name":"time of input iterator"}, +{"tid":38,"ts":-7008,"dur":20,"pid":99999,"ph":"X","name":"time of input iterator"}, +{"tid":38,"ts":-5888,"dur":11,"pid":99999,"ph":"X","name":"time of input iterator"}, +{"name": "process_name", "ph": "M", "pid": 99999, "tid": 0, "args": {"name": "critical path"}}, +{"name": "process_name", "ph": "M", "pid": 0, "tid": 0, "args": {"name": " sr217"}}, +{"tid":0,"ts":-34362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":2,"system":1,"iowait":0}}, +{"tid":0,"ts":-33362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":0,"system":0,"iowait":0}}, +{"tid":0,"ts":-32362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":11,"system":1,"iowait":0}}, +{"tid":0,"ts":-31362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":9,"system":0,"iowait":0}}, +{"tid":0,"ts":-30362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":0,"system":0,"iowait":0}}, +{"tid":0,"ts":-29362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":3,"system":0,"iowait":0}}, +{"tid":0,"ts":-28362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":14,"system":0,"iowait":0}}, +{"tid":0,"ts":-27362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":8,"system":1,"iowait":0}}, +{"tid":0,"ts":-26362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":15,"system":0,"iowait":0}}, +{"tid":0,"ts":-25362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":9,"system":1,"iowait":1}}, +{"tid":0,"ts":-24362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":12,"system":1,"iowait":2}}, +{"tid":0,"ts":-23362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":4,"system":0,"iowait":0}}, +{"tid":0,"ts":-22362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":41,"system":2,"iowait":0}}, +{"tid":0,"ts":-21362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":69,"system":10,"iowait":1}}, +{"tid":0,"ts":-20362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":69,"system":10,"iowait":1}}, +{"tid":0,"ts":-19362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":81,"system":8,"iowait":1}}, +{"tid":0,"ts":-18362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":84,"system":5,"iowait":0}}, +{"tid":0,"ts":-17362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":73,"system":8,"iowait":1}}, +{"tid":0,"ts":-16362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":76,"system":9,"iowait":1}}, +{"tid":0,"ts":-15362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":78,"system":8,"iowait":1}}, +{"tid":0,"ts":-14362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":77,"system":9,"iowait":0}}, +{"tid":0,"ts":-13362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":79,"system":9,"iowait":1}}, +{"tid":0,"ts":-12362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":83,"system":7,"iowait":0}}, +{"tid":0,"ts":-11362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":77,"system":9,"iowait":0}}, +{"tid":0,"ts":-10362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":76,"system":10,"iowait":1}}, +{"tid":0,"ts":-9362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":78,"system":8,"iowait":0}}, +{"tid":0,"ts":-8362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":26,"system":2,"iowait":0}}, +{"tid":0,"ts":-7362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":12,"system":0,"iowait":0}}, +{"tid":0,"ts":-6362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":13,"system":0,"iowait":0}}, +{"tid":0,"ts":-5362,"pid":0,"ph":"C","name":"all cpu%","args":{"user":7,"system":1,"iowait":0}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 0, "args": {"sort_index ": 0}}, +{"name": "process_name", "ph": "M", "pid": 0, "tid": 0, "args": {"name": " sr217"}}, +{"tid":3,"ts":-34362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-33362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-32362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":1}}, +{"tid":3,"ts":-31362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-30362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-29362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":1}}, +{"tid":3,"ts":-28362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-27362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":165}}, +{"tid":3,"ts":-26362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-25362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":505}}, +{"tid":3,"ts":-24362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":512}}, +{"tid":3,"ts":-23362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":0}}, +{"tid":3,"ts":-22362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":71,"write":1}}, +{"tid":3,"ts":-21362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":463,"write":0}}, +{"tid":3,"ts":-20362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":483,"write":0}}, +{"tid":3,"ts":-19362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":405,"write":0}}, +{"tid":3,"ts":-18362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":218,"write":0}}, +{"tid":3,"ts":-17362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":407,"write":0}}, +{"tid":3,"ts":-16362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":489,"write":0}}, +{"tid":3,"ts":-15362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":481,"write":0}}, +{"tid":3,"ts":-14362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":482,"write":0}}, +{"tid":3,"ts":-13362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":425,"write":0}}, +{"tid":3,"ts":-12362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":348,"write":0}}, +{"tid":3,"ts":-11362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":458,"write":0}}, +{"tid":3,"ts":-10362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":486,"write":0}}, +{"tid":3,"ts":-9362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":282,"write":0}}, +{"tid":3,"ts":-8362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":49,"write":0}}, +{"tid":3,"ts":-7362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":1}}, +{"tid":3,"ts":-6362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":1}}, +{"tid":3,"ts":-5362,"pid":0,"ph":"C","name":"disk b/w","args":{"read":0,"write":1}}, +{"tid":4,"ts":-34362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-33362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-32362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-31362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-30362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-29362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-28362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-27362,"pid":0,"ph":"C","name":"disk%","args":{"%util":16.0}}, +{"tid":4,"ts":-26362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-25362,"pid":0,"ph":"C","name":"disk%","args":{"%util":44.0}}, +{"tid":4,"ts":-24362,"pid":0,"ph":"C","name":"disk%","args":{"%util":47.0}}, +{"tid":4,"ts":-23362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-22362,"pid":0,"ph":"C","name":"disk%","args":{"%util":8.0}}, +{"tid":4,"ts":-21362,"pid":0,"ph":"C","name":"disk%","args":{"%util":53.0}}, +{"tid":4,"ts":-20362,"pid":0,"ph":"C","name":"disk%","args":{"%util":65.0}}, +{"tid":4,"ts":-19362,"pid":0,"ph":"C","name":"disk%","args":{"%util":48.0}}, +{"tid":4,"ts":-18362,"pid":0,"ph":"C","name":"disk%","args":{"%util":40.0}}, +{"tid":4,"ts":-17362,"pid":0,"ph":"C","name":"disk%","args":{"%util":50.0}}, +{"tid":4,"ts":-16362,"pid":0,"ph":"C","name":"disk%","args":{"%util":67.0}}, +{"tid":4,"ts":-15362,"pid":0,"ph":"C","name":"disk%","args":{"%util":71.0}}, +{"tid":4,"ts":-14362,"pid":0,"ph":"C","name":"disk%","args":{"%util":75.0}}, +{"tid":4,"ts":-13362,"pid":0,"ph":"C","name":"disk%","args":{"%util":56.0}}, +{"tid":4,"ts":-12362,"pid":0,"ph":"C","name":"disk%","args":{"%util":77.0}}, +{"tid":4,"ts":-11362,"pid":0,"ph":"C","name":"disk%","args":{"%util":72.0}}, +{"tid":4,"ts":-10362,"pid":0,"ph":"C","name":"disk%","args":{"%util":79.0}}, +{"tid":4,"ts":-9362,"pid":0,"ph":"C","name":"disk%","args":{"%util":55.0}}, +{"tid":4,"ts":-8362,"pid":0,"ph":"C","name":"disk%","args":{"%util":19.0}}, +{"tid":4,"ts":-7362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-6362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":4,"ts":-5362,"pid":0,"ph":"C","name":"disk%","args":{"%util":0.0}}, +{"tid":5,"ts":-34362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":0.0}}, +{"tid":5,"ts":-33362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":0.0}}, +{"tid":5,"ts":-32362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":102.93}}, +{"tid":5,"ts":-31362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":12.92}}, +{"tid":5,"ts":-30362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":0.0}}, +{"tid":5,"ts":-29362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":110.0}}, +{"tid":5,"ts":-28362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":0.0}}, +{"tid":5,"ts":-27362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":120.43}}, +{"tid":5,"ts":-26362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":0.0}}, +{"tid":5,"ts":-25362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":114.69}}, +{"tid":5,"ts":-24362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":127.54}}, +{"tid":5,"ts":-23362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":6.0}}, +{"tid":5,"ts":-22362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":78.52}}, +{"tid":5,"ts":-21362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":85.83}}, +{"tid":5,"ts":-20362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.81}}, +{"tid":5,"ts":-19362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.37}}, +{"tid":5,"ts":-18362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":82.5}}, +{"tid":5,"ts":-17362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":84.67}}, +{"tid":5,"ts":-16362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.64}}, +{"tid":5,"ts":-15362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.49}}, +{"tid":5,"ts":-14362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.78}}, +{"tid":5,"ts":-13362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.36}}, +{"tid":5,"ts":-12362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.82}}, +{"tid":5,"ts":-11362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.66}}, +{"tid":5,"ts":-10362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":83.41}}, +{"tid":5,"ts":-9362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":82.6}}, +{"tid":5,"ts":-8362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":79.92}}, +{"tid":5,"ts":-7362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":122.22}}, +{"tid":5,"ts":-6362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":122.22}}, +{"tid":5,"ts":-5362,"pid":0,"ph":"C","name":"req size","args":{"avgrq-sz":122.22}}, +{"tid":6,"ts":-34362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-33362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-32362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-31362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-30362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-29362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-28362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-27362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":2.8}}, +{"tid":6,"ts":-26362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-25362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":132.74}}, +{"tid":6,"ts":-24362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":65.4}}, +{"tid":6,"ts":-23362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-22362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":1.46}}, +{"tid":6,"ts":-21362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":7.8}}, +{"tid":6,"ts":-20362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":6.26}}, +{"tid":6,"ts":-19362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":4.68}}, +{"tid":6,"ts":-18362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":2.44}}, +{"tid":6,"ts":-17362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":6.5}}, +{"tid":6,"ts":-16362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":6.0}}, +{"tid":6,"ts":-15362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":5.34}}, +{"tid":6,"ts":-14362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":5.02}}, +{"tid":6,"ts":-13362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":4.84}}, +{"tid":6,"ts":-12362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":3.08}}, +{"tid":6,"ts":-11362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":4.6}}, +{"tid":6,"ts":-10362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":4.96}}, +{"tid":6,"ts":-9362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":3.0}}, +{"tid":6,"ts":-8362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.46}}, +{"tid":6,"ts":-7362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-6362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":6,"ts":-5362,"pid":0,"ph":"C","name":"queue size","args":{"avgqu-sz":0.0}}, +{"tid":7,"ts":-34362,"pid":0,"ph":"C","name":"await","args":{"await":0.0}}, +{"tid":7,"ts":-33362,"pid":0,"ph":"C","name":"await","args":{"await":0.0}}, +{"tid":7,"ts":-32362,"pid":0,"ph":"C","name":"await","args":{"await":0.13}}, +{"tid":7,"ts":-31362,"pid":0,"ph":"C","name":"await","args":{"await":0.23}}, +{"tid":7,"ts":-30362,"pid":0,"ph":"C","name":"await","args":{"await":0.0}}, +{"tid":7,"ts":-29362,"pid":0,"ph":"C","name":"await","args":{"await":0.14}}, +{"tid":7,"ts":-28362,"pid":0,"ph":"C","name":"await","args":{"await":0.0}}, +{"tid":7,"ts":-27362,"pid":0,"ph":"C","name":"await","args":{"await":0.99}}, +{"tid":7,"ts":-26362,"pid":0,"ph":"C","name":"await","args":{"await":0.0}}, +{"tid":7,"ts":-25362,"pid":0,"ph":"C","name":"await","args":{"await":14.7}}, +{"tid":7,"ts":-24362,"pid":0,"ph":"C","name":"await","args":{"await":7.94}}, +{"tid":7,"ts":-23362,"pid":0,"ph":"C","name":"await","args":{"await":0.17}}, +{"tid":7,"ts":-22362,"pid":0,"ph":"C","name":"await","args":{"await":0.78}}, +{"tid":7,"ts":-21362,"pid":0,"ph":"C","name":"await","args":{"await":0.71}}, +{"tid":7,"ts":-20362,"pid":0,"ph":"C","name":"await","args":{"await":0.53}}, +{"tid":7,"ts":-19362,"pid":0,"ph":"C","name":"await","args":{"await":0.47}}, +{"tid":7,"ts":-18362,"pid":0,"ph":"C","name":"await","args":{"await":0.45}}, +{"tid":7,"ts":-17362,"pid":0,"ph":"C","name":"await","args":{"await":0.66}}, +{"tid":7,"ts":-16362,"pid":0,"ph":"C","name":"await","args":{"await":0.5}}, +{"tid":7,"ts":-15362,"pid":0,"ph":"C","name":"await","args":{"await":0.45}}, +{"tid":7,"ts":-14362,"pid":0,"ph":"C","name":"await","args":{"await":0.42}}, +{"tid":7,"ts":-13362,"pid":0,"ph":"C","name":"await","args":{"await":0.46}}, +{"tid":7,"ts":-12362,"pid":0,"ph":"C","name":"await","args":{"await":0.36}}, +{"tid":7,"ts":-11362,"pid":0,"ph":"C","name":"await","args":{"await":0.41}}, +{"tid":7,"ts":-10362,"pid":0,"ph":"C","name":"await","args":{"await":0.41}}, +{"tid":7,"ts":-9362,"pid":0,"ph":"C","name":"await","args":{"await":0.43}}, +{"tid":7,"ts":-8362,"pid":0,"ph":"C","name":"await","args":{"await":0.37}}, +{"tid":7,"ts":-7362,"pid":0,"ph":"C","name":"await","args":{"await":0.22}}, +{"tid":7,"ts":-6362,"pid":0,"ph":"C","name":"await","args":{"await":0.22}}, +{"tid":7,"ts":-5362,"pid":0,"ph":"C","name":"await","args":{"await":0.22}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 3, "args": {"sort_index ": 3}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 4, "args": {"sort_index ": 4}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 5, "args": {"sort_index ": 5}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 6, "args": {"sort_index ": 6}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 7, "args": {"sort_index ": 7}}, +{"name": "process_name", "ph": "M", "pid": 0, "tid": 0, "args": {"name": " sr217"}}, +{"tid":1,"ts":-34362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-33362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-32362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-31362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-30362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-29362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-28362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-27362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-26362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-25362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-24362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-23362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-22362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-21362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-20362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-19362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-18362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-17362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":0,"buffered":0,"used":8}}, +{"tid":1,"ts":-16362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-15362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-14362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-13362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-12362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-11362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-10362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-9362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-8362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-7362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-6362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":1,"ts":-5362,"pid":0,"ph":"C","name":"mem % ","args":{"cached":1,"buffered":0,"used":8}}, +{"tid":2,"ts":-34362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-33362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-32362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-31362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-30362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-29362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-28362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-27362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-26362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-25362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-24362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-23362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-22362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-21362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-20362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-19362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-18362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-17362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":0,"dirty":0}}, +{"tid":2,"ts":-16362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-15362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-14362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-13362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-12362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-11362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-10362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-9362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-8362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-7362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-6362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"tid":2,"ts":-5362,"pid":0,"ph":"C","name":"pagecache % ","args":{"clean":1,"dirty":0}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 1, "args": {"sort_index ": 1}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 2, "args": {"sort_index ": 2}}, +{"name": "process_name", "ph": "M", "pid": 0, "tid": 0, "args": {"name": " sr217"}}, +{"tid":10,"ts":-34362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-33362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-32362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-31362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-30362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-29362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-28362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-27362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-26362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-25362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-24362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-23362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-22362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":53,"txmb/s":53}}, +{"tid":10,"ts":-21362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":850,"txmb/s":850}}, +{"tid":10,"ts":-20362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":888,"txmb/s":888}}, +{"tid":10,"ts":-19362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":633,"txmb/s":633}}, +{"tid":10,"ts":-18362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":435,"txmb/s":435}}, +{"tid":10,"ts":-17362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":708,"txmb/s":708}}, +{"tid":10,"ts":-16362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":865,"txmb/s":865}}, +{"tid":10,"ts":-15362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":876,"txmb/s":876}}, +{"tid":10,"ts":-14362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":893,"txmb/s":893}}, +{"tid":10,"ts":-13362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":747,"txmb/s":747}}, +{"tid":10,"ts":-12362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":731,"txmb/s":731}}, +{"tid":10,"ts":-11362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":889,"txmb/s":889}}, +{"tid":10,"ts":-10362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":964,"txmb/s":964}}, +{"tid":10,"ts":-9362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":643,"txmb/s":643}}, +{"tid":10,"ts":-8362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":157,"txmb/s":157}}, +{"tid":10,"ts":-7362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-6362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"tid":10,"ts":-5362,"pid":0,"ph":"C","name":"lo ","args":{"rxmb/s":0,"txmb/s":0}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 8, "args": {"sort_index ": 8}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 9, "args": {"sort_index ": 9}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 10, "args": {"sort_index ": 10}}, +{"name": "process_name", "ph": "M", "pid": 0, "tid": 0, "args": {"name": " sr217"}}, +{"tid":11,"ts":-34362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-33362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-32362,"pid":0,"ph":"C","name":"page inout","args":{"in":1,"out":1}}, +{"tid":11,"ts":-31362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-30362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-29362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":1}}, +{"tid":11,"ts":-28362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-27362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":165}}, +{"tid":11,"ts":-26362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-25362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":546}}, +{"tid":11,"ts":-24362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":471}}, +{"tid":11,"ts":-23362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":0}}, +{"tid":11,"ts":-22362,"pid":0,"ph":"C","name":"page inout","args":{"in":73,"out":1}}, +{"tid":11,"ts":-21362,"pid":0,"ph":"C","name":"page inout","args":{"in":463,"out":0}}, +{"tid":11,"ts":-20362,"pid":0,"ph":"C","name":"page inout","args":{"in":483,"out":0}}, +{"tid":11,"ts":-19362,"pid":0,"ph":"C","name":"page inout","args":{"in":404,"out":0}}, +{"tid":11,"ts":-18362,"pid":0,"ph":"C","name":"page inout","args":{"in":218,"out":0}}, +{"tid":11,"ts":-17362,"pid":0,"ph":"C","name":"page inout","args":{"in":408,"out":0}}, +{"tid":11,"ts":-16362,"pid":0,"ph":"C","name":"page inout","args":{"in":489,"out":0}}, +{"tid":11,"ts":-15362,"pid":0,"ph":"C","name":"page inout","args":{"in":481,"out":0}}, +{"tid":11,"ts":-14362,"pid":0,"ph":"C","name":"page inout","args":{"in":482,"out":0}}, +{"tid":11,"ts":-13362,"pid":0,"ph":"C","name":"page inout","args":{"in":424,"out":0}}, +{"tid":11,"ts":-12362,"pid":0,"ph":"C","name":"page inout","args":{"in":348,"out":0}}, +{"tid":11,"ts":-11362,"pid":0,"ph":"C","name":"page inout","args":{"in":458,"out":4}}, +{"tid":11,"ts":-10362,"pid":0,"ph":"C","name":"page inout","args":{"in":485,"out":0}}, +{"tid":11,"ts":-9362,"pid":0,"ph":"C","name":"page inout","args":{"in":282,"out":0}}, +{"tid":11,"ts":-8362,"pid":0,"ph":"C","name":"page inout","args":{"in":49,"out":0}}, +{"tid":11,"ts":-7362,"pid":0,"ph":"C","name":"page inout","args":{"in":1,"out":1}}, +{"tid":11,"ts":-6362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":1}}, +{"tid":11,"ts":-5362,"pid":0,"ph":"C","name":"page inout","args":{"in":0,"out":1}}, +{"tid":12,"ts":-34362,"pid":0,"ph":"C","name":"faults","args":{"major":3,"minor":34023}}, +{"tid":12,"ts":-33362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":1354}}, +{"tid":12,"ts":-32362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":50712}}, +{"tid":12,"ts":-31362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":25536}}, +{"tid":12,"ts":-30362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":161}}, +{"tid":12,"ts":-29362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":25359}}, +{"tid":12,"ts":-28362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":49819}}, +{"tid":12,"ts":-27362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":17304}}, +{"tid":12,"ts":-26362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":40662}}, +{"tid":12,"ts":-25362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":16519}}, +{"tid":12,"ts":-24362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":42377}}, +{"tid":12,"ts":-23362,"pid":0,"ph":"C","name":"faults","args":{"major":1,"minor":22581}}, +{"tid":12,"ts":-22362,"pid":0,"ph":"C","name":"faults","args":{"major":1,"minor":72419}}, +{"tid":12,"ts":-21362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":243832}}, +{"tid":12,"ts":-20362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":216635}}, +{"tid":12,"ts":-19362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":205818}}, +{"tid":12,"ts":-18362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":144239}}, +{"tid":12,"ts":-17362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":179561}}, +{"tid":12,"ts":-16362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":221768}}, +{"tid":12,"ts":-15362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":206784}}, +{"tid":12,"ts":-14362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":204236}}, +{"tid":12,"ts":-13362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":222004}}, +{"tid":12,"ts":-12362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":175002}}, +{"tid":12,"ts":-11362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":249102}}, +{"tid":12,"ts":-10362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":267591}}, +{"tid":12,"ts":-9362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":185914}}, +{"tid":12,"ts":-8362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":51858}}, +{"tid":12,"ts":-7362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":48755}}, +{"tid":12,"ts":-6362,"pid":0,"ph":"C","name":"faults","args":{"major":0,"minor":67785}}, +{"tid":12,"ts":-5362,"pid":0,"ph":"C","name":"faults","args":{"major":2,"minor":41314}}, +{"tid":13,"ts":-34362,"pid":0,"ph":"C","name":"page free","args":{"free":104}}, +{"tid":13,"ts":-33362,"pid":0,"ph":"C","name":"page free","args":{"free":5}}, +{"tid":13,"ts":-32362,"pid":0,"ph":"C","name":"page free","args":{"free":13}}, +{"tid":13,"ts":-31362,"pid":0,"ph":"C","name":"page free","args":{"free":6}}, +{"tid":13,"ts":-30362,"pid":0,"ph":"C","name":"page free","args":{"free":0}}, +{"tid":13,"ts":-29362,"pid":0,"ph":"C","name":"page free","args":{"free":29}}, +{"tid":13,"ts":-28362,"pid":0,"ph":"C","name":"page free","args":{"free":40}}, +{"tid":13,"ts":-27362,"pid":0,"ph":"C","name":"page free","args":{"free":24}}, +{"tid":13,"ts":-26362,"pid":0,"ph":"C","name":"page free","args":{"free":11}}, +{"tid":13,"ts":-25362,"pid":0,"ph":"C","name":"page free","args":{"free":19}}, +{"tid":13,"ts":-24362,"pid":0,"ph":"C","name":"page free","args":{"free":20}}, +{"tid":13,"ts":-23362,"pid":0,"ph":"C","name":"page free","args":{"free":9}}, +{"tid":13,"ts":-22362,"pid":0,"ph":"C","name":"page free","args":{"free":39}}, +{"tid":13,"ts":-21362,"pid":0,"ph":"C","name":"page free","args":{"free":287}}, +{"tid":13,"ts":-20362,"pid":0,"ph":"C","name":"page free","args":{"free":579}}, +{"tid":13,"ts":-19362,"pid":0,"ph":"C","name":"page free","args":{"free":685}}, +{"tid":13,"ts":-18362,"pid":0,"ph":"C","name":"page free","args":{"free":681}}, +{"tid":13,"ts":-17362,"pid":0,"ph":"C","name":"page free","args":{"free":460}}, +{"tid":13,"ts":-16362,"pid":0,"ph":"C","name":"page free","args":{"free":664}}, +{"tid":13,"ts":-15362,"pid":0,"ph":"C","name":"page free","args":{"free":680}}, +{"tid":13,"ts":-14362,"pid":0,"ph":"C","name":"page free","args":{"free":677}}, +{"tid":13,"ts":-13362,"pid":0,"ph":"C","name":"page free","args":{"free":664}}, +{"tid":13,"ts":-12362,"pid":0,"ph":"C","name":"page free","args":{"free":664}}, +{"tid":13,"ts":-11362,"pid":0,"ph":"C","name":"page free","args":{"free":630}}, +{"tid":13,"ts":-10362,"pid":0,"ph":"C","name":"page free","args":{"free":702}}, +{"tid":13,"ts":-9362,"pid":0,"ph":"C","name":"page free","args":{"free":701}}, +{"tid":13,"ts":-8362,"pid":0,"ph":"C","name":"page free","args":{"free":255}}, +{"tid":13,"ts":-7362,"pid":0,"ph":"C","name":"page free","args":{"free":28}}, +{"tid":13,"ts":-6362,"pid":0,"ph":"C","name":"page free","args":{"free":55}}, +{"tid":13,"ts":-5362,"pid":0,"ph":"C","name":"page free","args":{"free":53}}, +{"tid":14,"ts":-34362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-33362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-32362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-31362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-30362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-29362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-28362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-27362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-26362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-25362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-24362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-23362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-22362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-21362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-20362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-19362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-18362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-17362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-16362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-15362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-14362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-13362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-12362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-11362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-10362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-9362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-8362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-7362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-6362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":14,"ts":-5362,"pid":0,"ph":"C","name":"scan","args":{"kernel":0,"app":0}}, +{"tid":15,"ts":-34362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-33362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-32362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-31362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-30362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-29362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-28362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-27362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-26362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-25362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-24362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-23362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-22362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-21362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-20362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-19362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-18362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-17362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-16362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-15362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-14362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-13362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-12362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-11362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-10362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-9362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-8362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-7362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-6362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"tid":15,"ts":-5362,"pid":0,"ph":"C","name":"vmeff","args":{"steal":0}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 11, "args": {"sort_index ": 11}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 12, "args": {"sort_index ": 12}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 13, "args": {"sort_index ": 13}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 14, "args": {"sort_index ": 14}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 15, "args": {"sort_index ": 15}}, +{"name": "thread_sort_index", "ph": "M", "pid": 0, "tid": 16, "args": {"sort_index ": 16}}, +{"name": "process_name", "ph": "M", "pid": 200, "tid": 0, "args": {"name": " sr217"}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-33062,"args":{"cpu%":0.004}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-32562,"args":{"cpu%":0.036}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-32062,"args":{"cpu%":0.123}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-31561,"args":{"cpu%":0.193}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-31061,"args":{"cpu%":0.091}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-30561,"args":{"cpu%":0.018}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-30060,"args":{"cpu%":0.003}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-29560,"args":{"cpu%":0.003}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-29060,"args":{"cpu%":0.049}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-28559,"args":{"cpu%":0.156}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-28059,"args":{"cpu%":0.158}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-27559,"args":{"cpu%":0.102}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-27058,"args":{"cpu%":0.097}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-26558,"args":{"cpu%":0.16}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-26058,"args":{"cpu%":0.184}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-25557,"args":{"cpu%":0.08}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-25057,"args":{"cpu%":0.124}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-24557,"args":{"cpu%":0.219}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-24056,"args":{"cpu%":0.101}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-23556,"args":{"cpu%":0.095}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-23055,"args":{"cpu%":0.026}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-22555,"args":{"cpu%":0.003}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-22055,"args":{"cpu%":0.721}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-21551,"args":{"cpu%":0.689}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-21051,"args":{"cpu%":0.856}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-20551,"args":{"cpu%":0.976}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-20049,"args":{"cpu%":0.688}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-19549,"args":{"cpu%":0.96}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-19049,"args":{"cpu%":0.864}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-18549,"args":{"cpu%":0.821}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-18047,"args":{"cpu%":0.96}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-17547,"args":{"cpu%":0.664}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-17047,"args":{"cpu%":0.958}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-16543,"args":{"cpu%":0.874}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-16039,"args":{"cpu%":0.857}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-15539,"args":{"cpu%":0.953}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-15035,"args":{"cpu%":0.802}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-14535,"args":{"cpu%":0.94}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-14035,"args":{"cpu%":0.821}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-13533,"args":{"cpu%":0.959}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-13033,"args":{"cpu%":0.846}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-12533,"args":{"cpu%":0.852}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-12031,"args":{"cpu%":0.948}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-11529,"args":{"cpu%":0.818}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-11027,"args":{"cpu%":0.922}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-10527,"args":{"cpu%":0.911}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-10027,"args":{"cpu%":0.855}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-9526,"args":{"cpu%":0.926}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-9026,"args":{"cpu%":0.798}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-8526,"args":{"cpu%":0.557}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-8026,"args":{"cpu%":0.21}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-7525,"args":{"cpu%":0.114}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-7025,"args":{"cpu%":0.113}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-6525,"args":{"cpu%":0.241}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-6024,"args":{"cpu%":0.114}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-5524,"args":{"cpu%":0.14}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-5024,"args":{"cpu%":0.047}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-4524,"args":{"cpu%":0.031}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-4023,"args":{"cpu%":0.037}}, +{"tid":0,"pid":200,"ph":"C","name":"emon_cpuutil","ts":-3523,"args":{"cpu%":0.051}}, +{"name": "thread_sort_index", "ph": "M", "pid": 200, "tid": 0, "args": {"sort_index ": 0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-33062,"args":{"cpu freq":1484.899}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-32562,"args":{"cpu freq":3490.27}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-32062,"args":{"cpu freq":3506.32}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-31561,"args":{"cpu freq":3527.429}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-31061,"args":{"cpu freq":3561.722}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-30561,"args":{"cpu freq":3319.664}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-30060,"args":{"cpu freq":1379.214}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-29560,"args":{"cpu freq":1890.723}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-29060,"args":{"cpu freq":3482.122}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-28559,"args":{"cpu freq":3513.709}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-28059,"args":{"cpu freq":3531.37}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-27559,"args":{"cpu freq":3537.36}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-27058,"args":{"cpu freq":3328.559}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-26558,"args":{"cpu freq":3507.392}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-26058,"args":{"cpu freq":3526.368}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-25557,"args":{"cpu freq":3544.389}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-25057,"args":{"cpu freq":3461.059}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-24557,"args":{"cpu freq":3485.67}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-24056,"args":{"cpu freq":3519.72}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-23556,"args":{"cpu freq":3411.889}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-23055,"args":{"cpu freq":3411.17}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-22555,"args":{"cpu freq":1626.804}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-22055,"args":{"cpu freq":3497.99}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-21551,"args":{"cpu freq":3494.923}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-21051,"args":{"cpu freq":3500.409}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-20551,"args":{"cpu freq":3500.0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-20049,"args":{"cpu freq":3500.053}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-19549,"args":{"cpu freq":3500.0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-19049,"args":{"cpu freq":3499.993}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-18549,"args":{"cpu freq":3498.202}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-18047,"args":{"cpu freq":3499.996}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-17547,"args":{"cpu freq":3489.839}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-17047,"args":{"cpu freq":3500.0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-16543,"args":{"cpu freq":3500.001}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-16039,"args":{"cpu freq":3500.034}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-15539,"args":{"cpu freq":3500.0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-15035,"args":{"cpu freq":3500.045}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-14535,"args":{"cpu freq":3499.994}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-14035,"args":{"cpu freq":3500.181}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-13533,"args":{"cpu freq":3500.0}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-13033,"args":{"cpu freq":3495.654}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-12533,"args":{"cpu freq":3499.996}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-12031,"args":{"cpu freq":3499.987}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-11529,"args":{"cpu freq":3499.992}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-11027,"args":{"cpu freq":3500.001}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-10527,"args":{"cpu freq":3499.992}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-10027,"args":{"cpu freq":3499.986}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-9526,"args":{"cpu freq":3499.986}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-9026,"args":{"cpu freq":3499.806}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-8526,"args":{"cpu freq":3475.321}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-8026,"args":{"cpu freq":3329.652}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-7525,"args":{"cpu freq":3485.31}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-7025,"args":{"cpu freq":3516.153}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-6525,"args":{"cpu freq":3488.135}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-6024,"args":{"cpu freq":3530.858}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-5524,"args":{"cpu freq":3483.958}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-5024,"args":{"cpu freq":3002.176}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-4524,"args":{"cpu freq":2303.263}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-4023,"args":{"cpu freq":2380.255}}, +{"tid":1,"pid":200,"ph":"C","name":"emon_cpufreq","ts":-3523,"args":{"cpu freq":3051.839}}, +{"name": "thread_sort_index", "ph": "M", "pid": 200, "tid": 1, "args": {"sort_index ": 1}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-33062,"args":{"pathlength":0.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-32562,"args":{"pathlength":2.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-32062,"args":{"pathlength":7.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-31561,"args":{"pathlength":12.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-31061,"args":{"pathlength":6.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-30561,"args":{"pathlength":1.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-30060,"args":{"pathlength":0.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-29560,"args":{"pathlength":0.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-29060,"args":{"pathlength":3.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-28559,"args":{"pathlength":9.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-28059,"args":{"pathlength":10.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-27559,"args":{"pathlength":7.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-27058,"args":{"pathlength":5.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-26558,"args":{"pathlength":10.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-26058,"args":{"pathlength":12.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-25557,"args":{"pathlength":5.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-25057,"args":{"pathlength":7.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-24557,"args":{"pathlength":14.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-24056,"args":{"pathlength":6.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-23556,"args":{"pathlength":5.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-23055,"args":{"pathlength":2.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-22555,"args":{"pathlength":0.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-22055,"args":{"pathlength":35.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-21551,"args":{"pathlength":29.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-21051,"args":{"pathlength":63.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-20551,"args":{"pathlength":77.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-20049,"args":{"pathlength":34.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-19549,"args":{"pathlength":77.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-19049,"args":{"pathlength":63.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-18549,"args":{"pathlength":60.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-18047,"args":{"pathlength":76.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-17547,"args":{"pathlength":38.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-17047,"args":{"pathlength":77.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-16543,"args":{"pathlength":64.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-16039,"args":{"pathlength":59.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-15539,"args":{"pathlength":75.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-15035,"args":{"pathlength":59.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-14535,"args":{"pathlength":76.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-14035,"args":{"pathlength":60.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-13533,"args":{"pathlength":77.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-13033,"args":{"pathlength":58.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-12533,"args":{"pathlength":64.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-12031,"args":{"pathlength":75.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-11529,"args":{"pathlength":60.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-11027,"args":{"pathlength":73.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-10527,"args":{"pathlength":67.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-10027,"args":{"pathlength":65.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-9526,"args":{"pathlength":74.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-9026,"args":{"pathlength":55.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-8526,"args":{"pathlength":47.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-8026,"args":{"pathlength":18.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-7525,"args":{"pathlength":6.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-7025,"args":{"pathlength":7.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-6525,"args":{"pathlength":14.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-6024,"args":{"pathlength":7.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-5524,"args":{"pathlength":8.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-5024,"args":{"pathlength":2.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-4524,"args":{"pathlength":1.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-4023,"args":{"pathlength":2.0}}, +{"tid":2,"pid":200,"ph":"C","name":"emon_instr_retired","ts":-3523,"args":{"pathlength":3.0}}, +{"name": "thread_sort_index", "ph": "M", "pid": 200, "tid": 2, "args": {"sort_index ": 2}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-33062,"args":{"ipc":0.366}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-32562,"args":{"ipc":1.114}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-32062,"args":{"ipc":1.035}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-31561,"args":{"ipc":1.127}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-31061,"args":{"ipc":1.159}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-30561,"args":{"ipc":1.101}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-30060,"args":{"ipc":0.333}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-29560,"args":{"ipc":1.057}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-29060,"args":{"ipc":1.02}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-28559,"args":{"ipc":1.042}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-28059,"args":{"ipc":1.161}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-27559,"args":{"ipc":1.135}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-27058,"args":{"ipc":0.911}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-26558,"args":{"ipc":1.078}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-26058,"args":{"ipc":1.131}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-25557,"args":{"ipc":1.191}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-25057,"args":{"ipc":1.022}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-24557,"args":{"ipc":1.108}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-24056,"args":{"ipc":1.111}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-23556,"args":{"ipc":1.054}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-23055,"args":{"ipc":1.128}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-22555,"args":{"ipc":0.754}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-22055,"args":{"ipc":0.858}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-21551,"args":{"ipc":0.761}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-21051,"args":{"ipc":1.315}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-20551,"args":{"ipc":1.402}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-20049,"args":{"ipc":0.884}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-19549,"args":{"ipc":1.43}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-19049,"args":{"ipc":1.301}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-18549,"args":{"ipc":1.306}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-18047,"args":{"ipc":1.414}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-17547,"args":{"ipc":1.03}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-17047,"args":{"ipc":1.436}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-16543,"args":{"ipc":1.302}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-16039,"args":{"ipc":1.227}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-15539,"args":{"ipc":1.411}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-15035,"args":{"ipc":1.311}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-14535,"args":{"ipc":1.436}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-14035,"args":{"ipc":1.294}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-13533,"args":{"ipc":1.429}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-13033,"args":{"ipc":1.227}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-12533,"args":{"ipc":1.334}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-12031,"args":{"ipc":1.41}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-11529,"args":{"ipc":1.307}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-11027,"args":{"ipc":1.409}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-10527,"args":{"ipc":1.313}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-10027,"args":{"ipc":1.35}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-9526,"args":{"ipc":1.422}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-9026,"args":{"ipc":1.232}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-8526,"args":{"ipc":1.505}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-8026,"args":{"ipc":1.613}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-7525,"args":{"ipc":0.968}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-7025,"args":{"ipc":1.081}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-6525,"args":{"ipc":1.016}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-6024,"args":{"ipc":1.086}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-5524,"args":{"ipc":0.983}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-5024,"args":{"ipc":1.104}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-4524,"args":{"ipc":1.039}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-4023,"args":{"ipc":1.078}}, +{"tid":3,"pid":200,"ph":"C","name":"emon_ipc","ts":-3523,"args":{"ipc":1.176}}, +{"name": "thread_sort_index", "ph": "M", "pid": 200, "tid": 3, "args": {"sort_index ": 3}}, +{"name": "process_sort_index", "ph": "M", "pid": 0, "tid": 0, "args": {"sort_index ": 0}}, +{"name": "process_sort_index", "ph": "M", "pid": 100, "tid": 0, "args": {"sort_index ": 100}}, +{"name": "process_sort_index", "ph": "M", "pid": 200, "tid": 0, "args": {"sort_index ": 200}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100300, "tid": 0, "args": {"sort_index ": 100300}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}}, +{"name": "process_sort_index", "ph": "M", "pid": 100500, "tid": 0, "args": {"sort_index ": 100500}}, +{"name": "process_sort_index", "ph": "M", "pid": 100600, "tid": 0, "args": {"sort_index ": 100600}}, +{"name": "process_sort_index", "ph": "M", "pid": 100400, "tid": 0, "args": {"sort_index ": 100400}} + ], + "displayTimeUnit": "ns" + } \ No newline at end of file diff --git a/tools/workload/benchmark_velox/tpc_workload.ipynb b/tools/workload/benchmark_velox/tpc_workload.ipynb index 5dcb50a8a066..c0232d1d52f6 100644 --- a/tools/workload/benchmark_velox/tpc_workload.ipynb +++ b/tools/workload/benchmark_velox/tpc_workload.ipynb @@ -35,22 +35,10 @@ "# List of network devices. e.g. ['ens787f0']\n", "nic_dev=[]\n", "\n", - "# Hostname or IP to server for perf analysis. Able to connect via ssh.\n", - "server=''\n", - "\n", - "# Specify the directory on perf analysis server. Usually a codename for this run.\n", - "base_dir=''\n", - "\n", - "# Proxy used to connect to server for perf analysis.\n", - "proxy=''\n", - "\n", - "# Whether to upload profile to perf analysis server and run perf analysis scripts. Only takes effect if server is set.\n", - "analyze_perf=True\n", - "\n", "# Select workload. Can be either 'tpch' or 'tpcds'.\n", "workload='tpch'\n", "\n", - "# Run with gluten. If False, run vanilla Spark.\n", + "# Run with gluten. If False, run Spark.\n", "run_gluten=True\n", "\n", "# TPC tables\n", @@ -59,30 +47,49 @@ "\n", "# Parallelism\n", "executors_per_node=32\n", - "cores_per_executor=8\n", + "cores_per_executor=7\n", "\n", "gluten_tpch_task_per_core=2\n", "gluten_tpcds_task_per_core=4\n", - "vanilla_tpch_task_per_core=8\n", - "vanilla_tpcds_task_per_core=8\n", + "spark_tpch_task_per_core=8\n", + "spark_tpcds_task_per_core=8\n", "\n", "# Physical memory on each worker node.\n", "memory_per_node='1000g'\n", "\n", - "# Offheap ratio. 0 to disable offheap for vanilla Spark.\n", + "# Offheap ratio. 0 to disable offheap for Spark.\n", "# onheap:offheap = 1:2\n", - "vanilla_offheap_ratio=2.0\n", + "spark_offheap_ratio=2.0\n", "# onheap:offheap = 1:7\n", "gluten_offheap_ratio=7.0\n", "\n", "# spark.io.compression.codec\n", - "vanilla_codec='lz4'\n", + "spark_codec='lz4'\n", "# spark.gluten.sql.columnar.shuffle.codec\n", "gluten_codec='lz4'\n", "# spark.gluten.sql.columnar.shuffle.codecBackend\n", "gluten_codec_backend=''\n", "# spark.gluten.sql.columnar.maxBatchSize\n", - "max_batch_size=4096" + "max_batch_size=4096\n", + "\n", + "# Hostname or IP to server for perf analysis. Able to connect via ssh.\n", + "server=''\n", + "\n", + "# Specify the directory on perf analysis server. Usually a codename for this run.\n", + "base_dir=''\n", + "\n", + "# Proxy used to connect to server for perf analysis.\n", + "proxy=''\n", + "\n", + "# Emon event file for `emon -i`. Set to emptry string '' if emon is unavailable.\n", + "# Supported emon events on platform can be verified via `emon -i emon.list`\n", + "emon_list=''\n", + "\n", + "# Whether to run perf analysis scripts. Only takes effect if server is set.\n", + "analyze_perf=False\n", + "\n", + "# List of email to receive perf analysis results.\n", + "emails = []" ] }, { @@ -176,8 +183,8 @@ " pass\n", " return conf\n", "\n", - "def vanilla_conf_overwrite(conf):\n", - " conf.set('spark.io.compression.codec', vanilla_codec)\\\n", + "def spark_conf_overwrite(conf):\n", + " conf.set('spark.io.compression.codec', spark_codec)\\\n", " .set('spark.executorEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", " .set('spark.yarn.appMasterEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", "\n", @@ -190,7 +197,7 @@ "def app_conf_overwrite(conf):\n", " if run_gluten:\n", " return gluten_conf_overwrite(conf)\n", - " return vanilla_conf_overwrite(conf)" + " return spark_conf_overwrite(conf)" ] }, { @@ -238,7 +245,7 @@ "metadata": {}, "outputs": [], "source": [ - "test_tpc.start_monitor(clients)" + "test_tpc.start_monitor(clients, emon_list=emon_list)" ] }, { @@ -266,7 +273,7 @@ "outputs": [], "source": [ "if analyze_perf:\n", - " test_tpc.run_perf_analysis(disk_dev, nic_dev)" + " test_tpc.run_perf_analysis(disk_dev, nic_dev, proxy, emails)" ] }, {