diff --git a/tools/kdse2023.ipynb b/tools/kdse2023.ipynb new file mode 100644 index 00000000..01925791 --- /dev/null +++ b/tools/kdse2023.ipynb @@ -0,0 +1,471 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "16551e7a", + "metadata": {}, + "source": [ + "# K2DSE Benchmarks\n", + "\n", + "This is a lab report of currents experiments with KDSE*" + ] + }, + { + "cell_type": "markdown", + "id": "01a527cd", + "metadata": {}, + "source": [ + "## What do we need to show " + ] + }, + { + "cell_type": "markdown", + "id": "e1a5f05d", + "metadata": {}, + "source": [ + "### Goal 0 - re-implementation\n", + "\n", + "\n", + "First, I need to make sure my re-implementation is identical. Not trivial as we count SD differently." + ] + }, + { + "cell_type": "markdown", + "id": "d367b9ff", + "metadata": {}, + "source": [ + "#### 0.1 Count that the number of SD found is the same between OldKDSE,DKDSE,DKDSEA (the capstone implementation) and KDSE,K2DSE,K2DSEA (the paper re-implementation).\n", + "\n", + "✔ We are correct" + ] + }, + { + "cell_type": "markdown", + "id": "2f56dc30", + "metadata": {}, + "source": [ + "#### 0.2 Verify that the algorithm isnt slower\n", + "\n", + "✔ We are faster " + ] + }, + { + "cell_type": "markdown", + "id": "94536e86", + "metadata": {}, + "source": [ + "#### 0.3 Check that the thread implementation is identical and also faster." + ] + }, + { + "cell_type": "markdown", + "id": "0022ffb4", + "metadata": {}, + "source": [ + "✔ The implementation can explore more point given the concurrency effect over the end of exploration.\n", + "On large instances, there is a clear speed-up (when available)." + ] + }, + { + "cell_type": "markdown", + "id": "0860d11f", + "metadata": {}, + "source": [ + "### Goal 1 - Execution Time and size of explored space by DSE methods. \n", + "\n", + "#### generate the table 1 with KDSE,K2DSE,K2DSEA (no multi-thread).\n", + "\n", + "\n", + "### Goal 2 - Pareto fronts and explored space \n", + "\n", + "#### generate the Fig 3 with K2DSE,K2DSEA,PDSE (no multi-thread)." + ] + }, + { + "cell_type": "markdown", + "id": "6a4beb5d", + "metadata": {}, + "source": [ + "## Prepare data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12ba0b92", + "metadata": {}, + "outputs": [], + "source": [ + "import dsereader\n", + "\n", + "logdir = \"../kdse2023_log/\"\n", + "\n", + "\n", + "applications = {\n", + " \"bipartite\" : { \"name\" : \"bipartite\" },\n", + " \"Echo\" : { \"name\" : \"Echo\" },\n", + " \"fig8\" : { \"name\" : \"fig8\" },\n", + " \"H264\" : { \"name\" : \"H264\" },\n", + " \"modem\" : { \"name\" : \"modem\" },\n", + " \"sample\" : { \"name\" : \"sample\" },\n", + " \"satellite\" : { \"name\" : \"satellite\" },\n", + " \"BlackScholes\" : { \"name\" : \"BlackScholes\" },\n", + " \"example\" : { \"name\" : \"example\" },\n", + " \"h263decoder\" : { \"name\" : \"h263decoder\" },\n", + " \"JPEG2000\" : { \"name\" : \"JPEG2000\" },\n", + " \"PDectect\" : { \"name\" : \"PDectect\" },\n", + " \"samplerate\" : { \"name\" : \"samplerate\" }\n", + "}\n", + "\n", + "methods = {\n", + " # 0 Infos\n", + " # 1 Throughput \n", + " # 2 : { \"name\" : \"OldKDSE\" , \"color\" : \"black\"}, # \"-aKPeriodicThroughputwithDSE\"\n", + " # 3 : { \"name\" : \"DeepKDSE\" , \"color\" : \"red\"}, # \"-aDeepKPeriodicThroughputwithDSE\"\n", + " # 4 : { \"name\" : \"DeepKDSEA\" , \"color\" : \"green\"}, # \"-aDeepKPeriodicThroughputwithDSE -papprox=1\"\n", + " # 5 : { \"name\" : \"KDSE\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE\"\n", + " 2 : { \"name\" : \"K2DSE\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE\"\n", + " 3 : { \"name\" : \"K2DSEA\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=K2DSEA\"\n", + "# 8 : { \"name\" : \"KDSE2\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=2\"\n", + "# 8 : { \"name\" : \"KDSE4\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=4\"\n", + "# 10 : { \"name\" : \"KDSE8\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=8\"\n", + "# 9 : { \"name\" : \"KDSE16\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=16\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "628e0f64", + "metadata": {}, + "outputs": [], + "source": [ + "## Collect the max throughput for each application\n", + "import pandas as pd \n", + "\n", + "for app in applications.keys() :\n", + " for line in open(logdir + \"/\" + app + \"_1.txt\").read().split(\"\\n\"):\n", + " if 'KPeriodic Throughput is' in line :\n", + " th = float(line.split(\" \")[-1])\n", + " applications[app][\"max_throughput\"] = th\n", + " for line in open(logdir + \"/\" + app + \"_0.txt\").read().split(\"\\n\"):\n", + " if 'Task count' in line :\n", + " count = int(line.split(\" \")[-1])\n", + " applications[app][\"task_count\"] = count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb61fb1d", + "metadata": {}, + "outputs": [], + "source": [ + "dsereader.plot_all(logdir, graphs=applications.keys(), methods=methods, plotfunc=dsereader.plot_app_pareto)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02ff3ab0", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import datetime\n", + "def time_in_msec(time_msec): # copy pasted from https://stackoverflow.com/questions/48063828/convert-duration-format-from-float-to-monthdayshoursminutesseconds-in-python\n", + " time_sec = int(time_msec // 1000)\n", + " delta = datetime.timedelta(seconds=time_sec)\n", + " delta_str = str(delta)[-8:]\n", + " hours, minutes, seconds = [int(val) for val in delta_str.split(\":\", 3)]\n", + " weeks = delta.days // 7\n", + " days = delta.days % 7\n", + " return \"{}days {}h {}min {}.{}sec ({})\".format(days, hours, minutes, seconds,int(time_msec) & 1000, time_msec)\n", + "\n", + "time_in_msec(100000.10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd12cba1", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_dse_data(logdir, applications, methods, columns = [\"throughput\", \n", + " \"storage distribution size\", \n", + " \"cumulative duration\"]):\n", + " list_of_dict = []\n", + "\n", + " for key,values in applications.items():\n", + " \n", + " app = key\n", + " app_name = values[\"name\"]\n", + " app_max_throughput = values[\"max_throughput\"]\n", + " app_task_count = values[\"task_count\"]\n", + " for m in methods:\n", + " method_name = methods[m][\"name\"]\n", + " try :\n", + " df = dsereader.load_app_dse(logdir, app, m, cols = columns)\n", + " except FileNotFoundError:\n", + " df = pd.Dataframe()\n", + " sd_count = df[\"storage distribution size\"].count() if \"throughput\" in df else \"-\"\n", + " max_th = df[\"throughput\"].max() \n", + " duration = df[\"cumulative duration\"].max() if \"cumulative duration\" in df else \"-\"\n", + " print(app, m, max_th, app_max_throughput)\n", + " finished = math.isclose(max_th, app_max_throughput, rel_tol=1e-5)\n", + " pareto = dsereader.extract_pareto(df[[\"throughput\",\"storage distribution size\"]])\n", + " pareto_count = pareto[\"storage distribution size\"].count() if finished else \"-\"\n", + " list_of_dict += [{\"graph\" : app_name, \n", + " \"#task\" : app_task_count,\n", + " \"method\" : method_name, \n", + " \"#SD\" : sd_count,\n", + " \"#Pareto\" : pareto_count,\n", + " \"Duration\" : duration, \n", + " \"Finished\" : finished}]\n", + " \n", + " return pd.DataFrame(list_of_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcc51959", + "metadata": {}, + "outputs": [], + "source": [ + "df = gen_dse_data(logdir, applications=applications, methods=methods)\n", + "df.set_index([\"graph\",\"#task\",\"method\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db0f5f7c", + "metadata": {}, + "outputs": [], + "source": [ + "colformat = \"|\".join([\"\"] + [\"l\"] * df.index.nlevels + [\"r\"] * df.shape[1] + [\"\"])\n", + " \n", + "latex = df.to_latex(\n", + " float_format=\"{:0.1f}\".format # , column_format=colformat, index=False\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "8c65c8ba", + "metadata": {}, + "source": [ + "# Check new implementation" + ] + }, + { + "cell_type": "markdown", + "id": "ddf8fb3e", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "36a8254a", + "metadata": {}, + "source": [ + "## 0.1 Check the correctness of the new algorithm\n", + "\n", + "❌ We explore less for fig8,and \n", + "❌ We explore more for BlackScholes. This is due to a difference in OldKDSE when they initialize the first SD. they are correct with this particular app, but their init could be wrong. We stick to the curren one, can be improved later.\n", + "❌ There are strange artefact when looking at sample output from OldKDSE, it sets buffers to values higher than required at initilization, it is a bug in OldKDSE.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da53a31f", + "metadata": {}, + "outputs": [], + "source": [ + "methods_to_compare = [\"OldKDSE\", \"KDSE\"]\n", + "\n", + "df = gen_dse_data(logdir, applications=applications, methods=methods)\n", + "subdf = df[df[\"method\"].isin(methods_to_compare)]\n", + "subdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20e17338", + "metadata": {}, + "outputs": [], + "source": [ + "methods_to_compare = [\"DeepKDSE\", \"K2DSE\"]\n", + "\n", + "df = gen_dse_data(logdir, applications=applications, methods=methods)\n", + "subdf = df[df[\"method\"].isin(methods_to_compare)]\n", + "subdf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b46e8ce", + "metadata": {}, + "outputs": [], + "source": [ + "methods_to_compare = [\"DeepKDSEA\", \"K2DSEA\"]\n", + "\n", + "df = gen_dse_data(logdir, applications=applications, methods=methods)\n", + "subdf = df[df[\"method\"].isin(methods_to_compare)]\n", + "subdf" + ] + }, + { + "cell_type": "markdown", + "id": "c24b54ae", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "e537ab48", + "metadata": {}, + "source": [ + "## 0.2 and 0.3 Check non-threaded and threaded versions are faster\n", + "\n", + "When effective we gain one order of magnitude.\n", + "When not, we lose a few seconds maximum.\n", + "On my machine 16 is too much." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dd4d4a5", + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "sns.set_style('whitegrid')\n", + "\n", + "df = gen_dse_data(logdir, applications=applications, methods=methods)\n", + "\n", + "ax=sns.lineplot(data=df, x=\"method\", y=\"Duration\", hue=\"graph\", marker='o', markersize=5)\n", + "_ = ax.set(yscale='log')\n", + "\n", + "sns.move_legend(ax, \"upper left\", bbox_to_anchor=(1, 1))\n" + ] + }, + { + "cell_type": "markdown", + "id": "f44fe5bc", + "metadata": {}, + "source": [ + "## 0.3bis Check threaded version has no duplicates\n", + "\n", + "The following test ensure the threaded version does not explore twice the same point. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab079ef5", + "metadata": {}, + "outputs": [], + "source": [ + "def sanity_check(logdir, applications, methods):\n", + " for app_key,app_values in applications.items():\n", + " for method_key,method_values in methods.items():\n", + " \n", + " app_name = app_values[\"name\"]\n", + " app_task_count = app_values[\"task_count\"]\n", + " method_name = method_values[\"name\"]\n", + " \n", + " try :\n", + " df = dsereader.load_app_dse(logdir, app_key, method_key, cols = [\"throughput\", \n", + " \"storage distribution size\",\n", + " \"cumulative duration\",\n", + " \"feedback quantities\"])\n", + " except FileNotFoundError:\n", + " continue\n", + " except ValueError:\n", + " continue\n", + "\n", + " # assert it finished\n", + " max_th = df[\"throughput\"].max() \n", + " app_max_throughput = app_values[\"max_throughput\"]\n", + " finished = math.isclose(max_th, app_max_throughput, rel_tol=1e-5)\n", + " assert(finished)\n", + "\n", + " # assert there is no duplicates\n", + " duplicates_count = len(df[\"feedback quantities\"]) - len(df[\"feedback quantities\"].drop_duplicates())\n", + " assert(duplicates_count == 0)\n", + "\n", + " print(app_key, method_key, finished, duplicates_count)\n", + " \n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feb01449", + "metadata": {}, + "outputs": [], + "source": [ + "sanity_check(logdir, applications=applications, methods=methods)" + ] + }, + { + "cell_type": "markdown", + "id": "65de31e5", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "6a739c8a", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "1900dbaa", + "metadata": {}, + "source": [ + "# What do we do next\n", + "\n", + "The KDSE2 algorithm is recursive. The first level is global, the second level is local. \n", + "\n", + "## Improve the local level\n", + "\n", + "There is no way to get deeper, the local level is a single cycle. But single cycle are usually small, so we can accelerate this part by using static knowledge. For example the distribution size for a cycle must be greater than X to improve the throughput. \n", + "\n", + "## Improve the global level\n", + "\n", + "At the global level, instead of restarting the local level again and again for a same cycle, we could use a cache that store the result." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/kdse2023_aspdac.ipynb b/tools/kdse2023_aspdac.ipynb new file mode 100644 index 00000000..83bee664 --- /dev/null +++ b/tools/kdse2023_aspdac.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2f56dc30", + "metadata": {}, + "source": [ + "# K2DSE Benchmarks\n", + "\n", + "## What do we need to show \n", + "\n", + "\n", + "### Goal 0 - re-implementation\n", + "\n", + "First, I need to make sure my re-implementation is identical. Not trivial as we count SD differently.\n", + "\n", + "#### 0.1 Count that the number of SD found is the same between OldKDSE,DKDSE,DKDSEA (the capstone implementation) and KDSE,K2DSE,K2DSEA (the paper re-implementation).\n", + "\n", + "#### 0.2 Verify that the algorithm isnt slower" + ] + }, + { + "cell_type": "markdown", + "id": "9062f80a", + "metadata": {}, + "source": [ + "We are faster on small instances, need to verify on larger ones." + ] + }, + { + "cell_type": "markdown", + "id": "94536e86", + "metadata": {}, + "source": [ + "#### 0.3 Check that the thread implementation is identical and also faster." + ] + }, + { + "cell_type": "markdown", + "id": "0022ffb4", + "metadata": {}, + "source": [ + "So far I found duplicated, this implementation is no going to be used" + ] + }, + { + "cell_type": "markdown", + "id": "0860d11f", + "metadata": {}, + "source": [ + "### Goal 1 - Execution Time and size of explored space by DSE methods. \n", + "\n", + "#### generate the table 1 with KDSE,K2DSE,K2DSEA (no multi-thread).\n", + "\n", + "\n", + "### Goal 2 - Pareto fronts and explored space \n", + "\n", + "#### generate the Fig 3 with K2DSE,K2DSEA,PDSE (no multi-thread)." + ] + }, + { + "cell_type": "markdown", + "id": "6a4beb5d", + "metadata": {}, + "source": [ + "## Prepare data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12ba0b92", + "metadata": {}, + "outputs": [], + "source": [ + "import dsereader\n", + "\n", + "logdir = \"../kdse2023_log/\"\n", + "\n", + "appnames = [\"bipartite\",\n", + " \"Echo\",\n", + " \"fig8\",\n", + " \"H264\",\n", + " \"modem\",\n", + " # \"sample\",\n", + " \"satellite\",\n", + " \"BlackScholes\",\n", + " # \"example\",\n", + " \"h263decoder\",\n", + " \"JPEG2000\",\n", + " \"PDectect\",\n", + " \"samplerate\"]\n", + "\n", + "\n", + "methods = {\n", + " 2 : { \"name\" : \"OldKDSE\" , \"color\" : \"black\"}, # \"-aKPeriodicThroughputwithDSE\"\n", + " 3 : { \"name\" : \"DeepKDSE\" , \"color\" : \"red\"}, # \"-aDeepKPeriodicThroughputwithDSE\"\n", + " 4 : { \"name\" : \"DeepKDSEA\" , \"color\" : \"green\"}, # \"-aDeepKPeriodicThroughputwithDSE -papprox=1\"\n", + " # 5 : { \"name\" : \"KDSE\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE\"\n", + " # 6 : { \"name\" : \"K2DSE\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE\"\n", + " # 8 : { \"name\" : \"KDSE4\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=4\"\n", + " # 9 : { \"name\" : \"K2DSE4\" , \"color\" : \"black\"}, # \"-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE -pthread=4\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "628e0f64", + "metadata": {}, + "outputs": [], + "source": [ + "## Collect the max throughput for each application\n", + "import pandas as pd \n", + "\n", + "max_throughput = {}\n", + "for app in appnames :\n", + " for line in open(logdir + \"/\" + app + \"_1.txt\").read().split(\"\\n\"):\n", + " if 'KPeriodic Throughput is' in line :\n", + " th = float(line.split(\" \")[-1])\n", + " max_throughput[app] = th\n", + " print (f\"{app} {th}\")\n", + "task_count = {}\n", + "for app in appnames :\n", + " for line in open(logdir + \"/\" + app + \"_0.txt\").read().split(\"\\n\"):\n", + " if 'Task count' in line :\n", + " count = int(line.split(\" \")[-1])\n", + " task_count[app] = count\n", + " print (f\"{app} {count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e34753d", + "metadata": {}, + "outputs": [], + "source": [ + "appnames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb61fb1d", + "metadata": {}, + "outputs": [], + "source": [ + "dsereader.plot_all(logdir, graphs=appnames, methods=methods, plotfunc=dsereader.plot_app_pareto)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c824bbd6", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "for i, name in zip(range(1, len(appnames) + 1), appnames):\n", + " plt.figure()\n", + " dsereader.plot_app_pareto(logdir, appname=name, methods=methods)\n", + " #for m in methods.keys():\n", + " # infos = methods[m]\n", + " # try :\n", + " # df = dsereader.load_app_dse(logdir, name, m)\n", + " # dsereader.plot_pareto(df, dsecolor=infos[\"color\"], dsename=infos[\"name\"])\n", + " # except :\n", + " # pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "02ff3ab0", + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "import datetime\n", + "def time_in_msec(time_msec): # copy pasted from https://stackoverflow.com/questions/48063828/convert-duration-format-from-float-to-monthdayshoursminutesseconds-in-python\n", + " time_sec = int(time_msec // 1000)\n", + " delta = datetime.timedelta(seconds=time_sec)\n", + " delta_str = str(delta)[-8:]\n", + " hours, minutes, seconds = [int(val) for val in delta_str.split(\":\", 3)]\n", + " weeks = delta.days // 7\n", + " days = delta.days % 7\n", + " return \"{}days {}h {}min {}.{}sec ({})\".format(days, hours, minutes, seconds,int(time_msec) & 1000, time_msec)\n", + "\n", + "time_in_msec(100000.10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd12cba1", + "metadata": {}, + "outputs": [], + "source": [ + "def gen_dsetable(logdir, graphs, methods):\n", + " list_of_dict = []\n", + " #res = { \"name\" : [] }\n", + " #for m in methods:\n", + " # #res[methods[m][\"name\"]] = []\n", + " for i, name in zip(range(1, len(graphs) + 1), graphs):\n", + " #res[\"name\"].append(name)\n", + " for m in methods:\n", + " method_name = methods[m][\"name\"]\n", + " try :\n", + " df = dsereader.load_app_dse(logdir, name, m, cols = [\"throughput\", \"storage distribution size\",\"cumulative duration\"])\n", + " sd_count = df[\"storage distribution size\"].count() if \"throughput\" in df else \"-\"\n", + " max_th = df[\"throughput\"].max() \n", + " duration = df[\"cumulative duration\"].max() \n", + " finished = math.isclose(max_th, max_throughput[name], rel_tol=1e-5)\n", + " pareto = dsereader.extract_pareto(df)\n", + " pareto_count = pareto[\"storage distribution size\"].count() if finished else \"-\"\n", + " #res[methods[m][\"name\"]].append(sd_count)\n", + " list_of_dict += [{\"graph\" : name, \n", + " \"#task\" : task_count[name],\n", + " \"method\" : method_name, \n", + " \"#SD\" : sd_count,\n", + " \"#Pareto\" : pareto_count,\n", + " \"Duration\" : time_in_msec(duration), \n", + " \"Finished\" : finished}]\n", + " except FileNotFoundError:\n", + " list_of_dict += [{\"graph\" : name, \n", + " \"#task\" : task_count[name],\n", + " \"method\" : method_name,\n", + " \"#SD\" : \"-\", \n", + " \"#Pareto\" : \"-\",\n", + " \"Duration\" : \"-\", \n", + " \"Finished\" : False}]\n", + " \n", + " #df = pd.DataFrame(res)[[\"name\"] + [methods[m][\"name\"] for m in methods]] \n", + " #df = df.rename (columns = {\n", + " # \"name\" : \"Graph\"\n", + " #})\n", + "\n", + " return list_of_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "932c5f51", + "metadata": {}, + "outputs": [], + "source": [ + "l = gen_dsetable(logdir, graphs=appnames, methods=methods)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcc51959", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(l).set_index([\"graph\",\"#task\",\"method\"])\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db0f5f7c", + "metadata": {}, + "outputs": [], + "source": [ + "colformat = \"|\".join([\"\"] + [\"l\"] * df.index.nlevels + [\"r\"] * df.shape[1] + [\"\"])\n", + " \n", + "latex = df.to_latex(\n", + " float_format=\"{:0.1f}\".format # , column_format=colformat, index=False\n", + " )\n", + "print(latex)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49a81f53", + "metadata": {}, + "outputs": [], + "source": [ + "l" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4085903", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db2702d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "49eab050", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7bebb2fd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06d7e6bf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tools/kdse2023testbench.sh b/tools/kdse2023testbench.sh new file mode 100755 index 00000000..e2a45a3a --- /dev/null +++ b/tools/kdse2023testbench.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# This script run benchmarks for K2DSE(A) project + +DEFAULT_MAX_TIME=60s +DEFAULT_LOG_DIR=./kdse2023_log/ + +BENCHMARKS=( + "./benchmarks/sample.xml" + "./benchmarks/sdf3mem/bipartite.xml" + "./benchmarks/sdf3mem/buffercycle.xml" + "./benchmarks/sdf3mem/example.xml" + "./benchmarks/sdf3mem/fig8.xml" + "./benchmarks/sdf3mem/h263decoder.xml" + "./benchmarks/sdf3mem/modem.xml" + "./benchmarks/sdf3mem/samplerate.xml" + "./benchmarks/sdf3mem/satellite.xml" + "./benchmarks/IB5CSDF/BlackScholes.xml" + "./benchmarks/IB5CSDF/Echo.xml" + "./benchmarks/IB5CSDF/H264.xml" + "./benchmarks/IB5CSDF/JPEG2000.xml" + "./benchmarks/IB5CSDF/PDectect.xml" +) + +CONFIGS=( + "-aPrintInfos" + "-aKPeriodicThroughput -pDETAILS=1" + "-aKPeriodicThroughputwithDSE" + "-aDeepKPeriodicThroughputwithDSE" + "-aDeepKPeriodicThroughputwithDSE -papprox=1" + "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE" + "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE" + "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSEA" + "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=2" + "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=4" + "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=8" + "-athroughputbufferingDSE -prealtime=1 -pmode=KDSE -pthread=16" + "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSE -pthread=8" + "-athroughputbufferingDSE -prealtime=1 -pmode=K2DSEA -pthread=8" +) + +KITER="./Release/bin/kiter" + +if [ ! -x ${KITER} ]; then + echo -ne "Error Kiter is missing: ${KITER} Not found.\n" + exit 1 +fi + + +for f in ${BENCHMARKS[@]}; do + if [ ! -e ${f} ]; then + echo -ne "Error ${f} is missing.\n" + exit 1 + fi +done + +MAX_TIME=${DEFAULT_MAX_TIME} +if [ "$#" -ge 1 ]; then + MAX_TIME=$1 +fi + +LOG_DIR=${DEFAULT_LOG_DIR} +mkdir -p ${LOG_DIR} + + + +COMMAND_PREFIX="timeout --foreground ${MAX_TIME}" + +for i in ${!CONFIGS[@]}; do + config_parameters=${CONFIGS[$i]} + config_name=$(echo "${config_parameters}"| tr " =-" "_") + for graphfile in ${BENCHMARKS[@]}; do + graph=$(basename "${graphfile}" .xml) + echo "${COMMAND_PREFIX} ${KITER} -f ${graphfile} ${config_parameters} > \"${LOG_DIR}/${graph}_$config_name.txt\"" + ${COMMAND_PREFIX} ${KITER} -f "${graphfile}" ${config_parameters} > "${LOG_DIR}/${graph}_$config_name.txt" + done +done + diff --git a/tools/navigate_kdse_log.ipynb b/tools/navigate_kdse_log.ipynb new file mode 100644 index 00000000..0014bd35 --- /dev/null +++ b/tools/navigate_kdse_log.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "7b7e807f", + "metadata": {}, + "outputs": [], + "source": [ + "from pyvis.network import Network\n", + "import networkx as nx\n", + "import pandas as pd\n", + "import glob " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "078e0214", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6aef2c2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4aad6ec9", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_step_sizes (df) :\n", + " feedback_quantities = df['feedback quantities'].str.split(',', expand=True).astype(int)\n", + " step_sizes = []\n", + " for col in feedback_quantities :\n", + " tmp = sorted(feedback_quantities[col].unique())\n", + " if len(tmp) < 2 :\n", + " step_sizes.append(0)\n", + " else :\n", + " step_sizes.append(tmp[1] - tmp[0])\n", + " \n", + " return step_sizes\n", + "\n", + "\n", + "def split_list_in_dataframe(df, colname) :\n", + " new_df = df.drop(columns=[colname]).copy()\n", + " new_cols = df[colname].str.split(',', expand=True).astype(int)\n", + " new_col_names = [f\"{colname}_{x}\" for x in range(len(new_cols.columns))]\n", + " new_df[new_col_names] = new_cols.copy()\n", + " return new_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a826ac4", + "metadata": {}, + "outputs": [], + "source": [ + "def compute_next(step_sizes) :\n", + " def inner_compute_next(row) :\n", + " criticals = list(map(int, [x for x in row[\"critical feedback\"].split(',') if x != \"\"]))\n", + " originals = list(map(int, [x for x in row[\"feedback quantities\"].split(',') if x != \"\"]))\n", + " res = []\n", + " for critical_buffer_idx in criticals :\n", + " critical_buffer_idx = critical_buffer_idx - 1\n", + " next_configuration = originals.copy()\n", + " if step_sizes[critical_buffer_idx] > 0 :\n", + " next_configuration[critical_buffer_idx] = next_configuration[critical_buffer_idx] + step_sizes[critical_buffer_idx]\n", + " res.append(\",\".join(map(str, next_configuration)))\n", + " return res\n", + " return inner_compute_next\n", + "\n", + "def turn_config_to_index(df) :\n", + " df_by_fq = df.reset_index(names=\"index\").set_index(\"feedback quantities\")\n", + "\n", + " def inner_turn_config_to_index(row) :\n", + " next_configs = row[\"next configurations\"]\n", + " next_configs_indexes = []\n", + " for config in next_configs :\n", + " try :\n", + " next_configs_indexes.append(df_by_fq.loc[config][\"index\"])\n", + " except KeyError :\n", + " pass\n", + " # list(map(lambda x : df_by_fq.loc[x][\"index\"], ))\n", + " return next_configs_indexes\n", + " return inner_turn_config_to_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b97fd03d", + "metadata": {}, + "outputs": [], + "source": [ + "def load_one_file (filename) :\n", + " \n", + " # load one particular file\n", + " import numpy as np \n", + "\n", + " df = pd.read_csv(filename, dtype = {\"throughput\" : np.float64,\n", + " \"storage distribution size\": np.int64,\n", + " \"feedback quantities\" : str,\n", + " \"critical feedback\" : str,\n", + " })\n", + " df = df[[\"throughput\",\"storage distribution size\",\"feedback quantities\",\"critical feedback\"]]\n", + " df[\"critical feedback\"] = df[\"critical feedback\"].fillna(\"\")\n", + "\n", + " # Given the step sizes, and critical feedbackm we can compute the next configurations, \n", + " # handy to generate the graph properly.\n", + " step_sizes = calculate_step_sizes(df)\n", + " df[\"next configurations\"] = df.apply(compute_next(step_sizes),axis=1)\n", + " df[\"next configurations\"] = df.apply(turn_config_to_index(df),axis=1)\n", + "\n", + " tmp_df = df.reset_index(names=\"index\")\n", + " df[\"past configurations\"] = tmp_df.apply(lambda x : [idx for idx,y in enumerate(tmp_df[\"next configurations\"]) if x[\"index\"] in y] , axis = 1)\n", + "\n", + "\n", + " # Compute X and Y position for the visualization.\n", + " df['Y'] = 50*(df['storage distribution size'].diff(1).shift(0) > 0).cumsum()\n", + " df['X'] = 400*df.groupby('storage distribution size').cumcount()\n", + "\n", + " df[\"interesting\"] = df.apply(lambda x : x[\"throughput\"] > df.loc[[y for y in x[\"past configurations\"]]][\"throughput\"].max() , axis=1)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59611b88", + "metadata": {}, + "outputs": [], + "source": [ + "import pyvis\n", + "#97c2fc80\n", + "UNUSED_COLOR='#97c2fc80'\n", + "USED_COLOR='#97c2fc'\n", + "INTERESTING_COLOR='#ff5733'\n", + "\n", + "def generate_config_graph(df):\n", + " \n", + " step_sizes = calculate_step_sizes(df)\n", + " print(step_sizes)\n", + " \n", + " feedback_quantities = df['feedback quantities'].str.split(',', expand=True).astype(int)\n", + " \n", + " col_of_interest = [x for x in feedback_quantities.columns if step_sizes[x] > 0 ]\n", + " feedback_quantities = feedback_quantities[col_of_interest]\n", + " step_sizes = [x for x in step_sizes if x > 0]\n", + " \n", + " names_feedback_quantities = feedback_quantities[col_of_interest].apply(lambda x : \",\".join(x.astype(str)),axis=1)\n", + " \n", + " # \n", + " # Create a directed graph\n", + " net = Network(notebook=True)\n", + " # Add nodes and edges to the graph\n", + " for i, row1 in feedback_quantities.iterrows():\n", + " \n", + " # decide of the color\n", + " interesting = df.iloc[i][\"interesting\"] \n", + " color = USED_COLOR\n", + " if interesting :\n", + " color = INTERESTING_COLOR\n", + " \n", + " net.add_node(i,physics=False, \n", + " x=int(df.iloc[i][\"X\"]), \n", + " y=int(df.iloc[i][\"Y\"]), \n", + " label=str(df.iloc[i][\"storage distribution size\"]) + \"(\" + str(df.iloc[i][\"throughput\"])+ \")\", \n", + " title=f\"{names_feedback_quantities.iloc[i]}\\n{df.iloc[i]}\", \n", + " shape='box', \n", + " borderWidth=3 if i == 0 else 0,\n", + " color=color,\n", + " ) # Set size to 20 for the first node, 10 for others\n", + " \n", + " for i, row1 in feedback_quantities.iterrows():\n", + " for j in df.iloc[i][\"next configurations\"]:\n", + " net.add_edge(i, int(j), width = 2)\n", + " \n", + "\n", + " return net" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3cba09c", + "metadata": {}, + "outputs": [], + "source": [ + "import re \n", + "\n", + "kdse_logs = {}\n", + "\n", + "#Load all the files\n", + "filenames = [x for x in glob.glob('../kdse2023_log/*__athroughputbufferingDSE__prealtime_1__pmode_*.txt')]\n", + "\n", + "for f in filenames :\n", + " matching = re.match('.*/(.*)__athroughputbufferingDSE__prealtime_1__pmode_(.*).txt', f)\n", + " if matching :\n", + " name = matching.group(1)\n", + " method = matching.group(2)\n", + " if not name in kdse_logs:\n", + " kdse_logs[name] = {}\n", + " kdse_logs[name][method] = f \n", + "kdse_logs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "490cdde3", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_one_file(kdse_logs[\"fig8\"][\"KDSE\"])\n", + "net = generate_config_graph(df)\n", + "net.show(\"interactive_network.html\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df619572", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_one_file(kdse_logs[\"fig8\"][\"K2DSE\"])\n", + "net = generate_config_graph(df)\n", + "net.show(\"interactive_network.html\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01f23379", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_one_file(kdse_logs[\"fig8\"][\"K2DSEA\"])\n", + "net = generate_config_graph(df)\n", + "net.show(\"interactive_network.html\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68e9dc2a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}