add plot scripts

IsaacRe · Sep 26, 2024 · 2a8eed3 · 2a8eed3
1 parent 256ab96
commit 2a8eed3
Show file tree

Hide file tree

Showing 4 changed files with 476 additions and 0 deletions.
diff --git a/experiments/plot_benchmarks.py b/experiments/plot_benchmarks.py
@@ -0,0 +1,183 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+from argparse import ArgumentParser
+import os
+from matplotlib.lines import Line2D
+
+cm = plt.get_cmap('gist_rainbow')
+color_cycle = list(plt.rcParams['axes.prop_cycle'].by_key()['color'])
+NUM_COLORS = 12
+new_colors = NUM_COLORS - len(color_cycle)
+color_cycle.extend([cm(1.*i/NUM_COLORS) for i in range(NUM_COLORS)])
+COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
+
+parser = ArgumentParser()
+parser.add_argument("--file", type=str, default="out.csv")
+parser.add_argument("--plot-input-lens", type=int, nargs="+", default=[-1])
+parser.add_argument("--max-input-len", type=int, default=None)
+parser.add_argument("--plot-cr-mult", type=float, nargs="+", default=[-1.0])
+parser.add_argument("--save-dir", type=str, default=None)
+parser.add_argument("--model", type=str, default=None)
+parser.add_argument("--gpu", type=str, default=None)
+args = parser.parse_args()
+
+save_dir = args.save_dir
+if save_dir is None:
+    save_dir = args.file.split('.')[0]
+os.makedirs(save_dir, exist_ok=True)
+
+df = pd.read_csv(args.file)
+df = df[~df.req_per_s.isna()]
+
+df = df[df.max_cache_tokens.apply(lambda x: x.split('-')[0][:-1] != '')]
+df['compression_rate'] = df.max_cache_tokens.apply(lambda x: int(x.split('-')[0][:-1])
+                                                   if x.split('-')[0] != 'full' else -1)
+df['row_id'] = [str(i) + '_' + str(cr) for i, cr in zip(df.input_len, df.compression_rate)]
+
+# average runs with same compression rate
+df = df.groupby('row_id').agg({'input_len': 'max', 'compression_rate': 'max',
+                               'max_batch_size': 'max', 'tok_per_s': 'mean'})
+print(df)
+
+df__ = df.groupby('compression_rate').agg({'input_len': 'max'})
+max_shared_input_len = df__.input_len.min()
+if args.max_input_len is not None:
+    max_shared_input_len = args.max_input_len
+df = df[df.input_len <= max_shared_input_len]
+
+max_compression_rate = df.compression_rate.max()
+input_lengths = df.input_len.unique()
+compression_rates = df.compression_rate[df.compression_rate != -1].unique()
+if args.plot_cr_mult == [-1.0]:
+    args.plot_cr_mult = compression_rates
+
+fig, ax = plt.subplots()
+
+# throughput by max compression rate
+assert len(input_lengths) <= len(color_cycle), f'{len(input_lengths)} > {len(color_cycle)}'
+mults = []
+mult_crs = []
+mult_baselines = []
+for cr in sorted(args.plot_cr_mult):
+    df_ = df[df.compression_rate == cr]
+    if args.plot_input_lens != [-1]:
+        df_ = df_[df_.input_len.apply(lambda x: x in args.plot_input_lens)]
+    max_row = df_[df_.tok_per_s == df_.tok_per_s.max()].iloc[0]
+    baseline = df[(df.input_len == max_row.input_len) & (df.compression_rate == -1)]
+    if len(baseline) > 0:
+        baseline = baseline.iloc[0]
+        mult = max_row.tok_per_s / baseline.tok_per_s
+        mults.append(mult)
+        mult_crs.append(max_row)
+        mult_baselines.append(baseline)
+for c, input_len in zip(color_cycle, sorted(input_lengths)):
+    if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
+        continue
+    df_ = df[(df.input_len == input_len) & (df.compression_rate != -1)]
+    df_ = df_.sort_values('compression_rate')
+    plt.plot(df_.compression_rate, df_.tok_per_s, label=input_len, c=c, alpha=0.7)
+
+# plot thrpt multiplier
+for max_mult, max_mult_cr, max_mult_baseline in zip(mults, mult_crs, mult_baselines):
+    ax.plot([max_mult_cr.compression_rate * 1.05] * 2,
+            [max_mult_baseline.tok_per_s, max_mult_cr.tok_per_s],
+            linewidth=1,
+            c='black')
+    ax.text(max_mult_cr.compression_rate,
+            max_mult_baseline.tok_per_s + (max_mult_cr.tok_per_s - max_mult_baseline.tok_per_s) / 2,
+            s='%.2fx' % max_mult,
+            fontsize=10,
+            horizontalalignment='right')
+
+xmin, xmax = ax.get_xlim()
+
+for c, input_len in zip(color_cycle, sorted(input_lengths)):
+    if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
+        continue
+    baseline = df[(df.input_len == input_len) & (df.compression_rate == -1)]
+    if len(baseline) > 0:
+        baseline = baseline.iloc[0]
+        ax.plot([xmin, xmax], [baseline.tok_per_s] * 2, c=c, linestyle='--', linewidth=1, alpha=0.6)
+
+legend_handles, _ = ax.get_legend_handles_labels()
+ax.legend(handles=legend_handles + [Line2D([0], [0], color='black', ls='--', label='vanilla vLLM')])
+ax.set_xscale('log')
+ax.set_xticks(COMPRESSION_RATES)
+ax.set_xticklabels(COMPRESSION_RATES)
+ax.set_xlim(xmin, xmax)
+ax.grid()
+ax.set_title(f"{args.gpu} throughput for varied input length")
+ax.set_xlabel("compression rate")
+ax.set_ylabel("throughput (tok/sec)")
+plt.savefig(f'{save_dir}/throughtput_by_cr.jpg')
+plt.savefig(f'{save_dir}/throughtput_by_cr.pdf')
+plt.show()
+plt.clf()
+
+# thoughput by input length
+for compression_rate in sorted(compression_rates):
+    df_ = df[df.compression_rate == compression_rate].sort_values('input_len')
+    df_ = df_[df_.input_len <= max_shared_input_len]
+    plt.plot(df_.input_len, df_.tok_per_s, label=f'{compression_rate}x', alpha=0.7)
+df_ = df[df.compression_rate == -1].sort_values('input_len')
+plt.plot(df_.input_len, df_.tok_per_s, label='vanilla vLLM', linestyle='--', c='black')
+plt.legend()
+plt.grid()
+plt.title(f"{args.gpu} - {args.model}")
+plt.savefig(f'{save_dir}/throughput_by_len.pdf')
+plt.savefig(f'{save_dir}/throughput_by_len.jpg')
+plt.xlabel('input length')
+plt.ylabel('throughput (tok/sec)')
+plt.show()
+plt.clf()
+
+fig, ax = plt.subplots()
+
+# max decoding batch by compression rate
+for c, input_len in zip(color_cycle, sorted(input_lengths)):
+    if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
+        continue
+    df_ = df[(df.input_len == input_len) & (df.compression_rate != -1)]
+    df_ = df_.sort_values('compression_rate')
+    ax.plot(df_.compression_rate, df_.max_batch_size, label=input_len, alpha=0.7, c=c)
+xmin, xmax = plt.xlim()
+for c, input_len in zip(color_cycle, sorted(input_lengths)):
+    if input_len not in args.plot_input_lens:
+        continue
+    df_ = df[(df.input_len == input_len) & (df.compression_rate == -1)]
+    if len(df_) > 0:
+        df_ = df_.iloc[0]
+        ax.plot([xmin, xmax], [df_.max_batch_size] * 2, c=c, linestyle='--',
+                 linewidth=1, alpha=0.6)
+
+legend_handles, _ = ax.get_legend_handles_labels()
+ax.legend(handles=legend_handles + [Line2D([0], [0], color='black', ls='--', label='vanilla vLLM')])
+
+ax.set_xscale('log')
+ax.set_xticks(COMPRESSION_RATES)
+ax.set_xticklabels(COMPRESSION_RATES)
+ax.set_xlim(xmin, xmax)
+ax.set_title(f"{args.gpu} - {args.model}")
+ax.set_xlabel('compression rate')
+ax.set_ylabel('batch size')
+ax.grid()
+plt.savefig(f'{save_dir}/max_batch_by_cr.pdf')
+plt.savefig(f'{save_dir}/max_batch_by_cr.jpg')
+plt.show()
+plt.clf()
+
+# max decoding batch by input length
+for compression_rate in sorted(compression_rates):
+    df_ = df[df.compression_rate == compression_rate].sort_values('input_len')
+    plt.plot(df_.input_len, df_.max_batch_size, label=f'{compression_rate}x', alpha=0.7)
+df_ = df[df.compression_rate == -1].sort_values('input_len')
+if len(df_) > 0:
+    plt.plot(df_.input_len, df_.max_batch_size, c='black', linestyle='--', label='vanilla vLLM')
+plt.legend()
+plt.title('Max decoding batch by input length')
+plt.xlabel('input length')
+plt.ylabel('batch size')
+plt.grid()
+plt.savefig(f'{save_dir}/max_batch_by_len.pdf')
+plt.savefig(f'{save_dir}/max_batch_by_len.jpg')
+plt.show()
diff --git a/experiments/plot_longbench.py b/experiments/plot_longbench.py
@@ -0,0 +1,106 @@
+import matplotlib.pyplot as plt
+import json
+from argparse import ArgumentParser
+
+color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
+COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
+COMPRESSION_RATE_STRINGS = ['2.0x', '4.0x', '8.0x', '16.0x', '32.0x', '64.0x']
+ONE_SUBTASK_PER_CATEGORY = ['narrativeqa', 'hotpotqa', 'gov_report', 'trec',
+                            'passage_retrieval_en', 'lcc']
+SUBTASK_CATEGORIES = {
+    "narrativeqa": "Single-Doc. QA",
+    "qasper": "Single-Doc. QA",
+    "multifieldqa_en": "Single-Doc. QA",
+    "hotpotqa": "Multi-Doc. QA",
+    "2wikimqa": "Multi-Doc. QA",
+    "musique": "Multi-Doc. QA",
+    "gov_report": "Summarization",
+    "qmsum": "Summarization",
+    "multi_news": "Summarization",
+    "trec": "Few-shot Learning",
+    "triviaqa": "Few-shot Learning",
+    "samsum": "Few-shot Learning",
+    "passage_count": "Synthetic",
+    "passage_retrieval_en": "Synthetic",
+    "lcc": "Code",
+    "repobench-p": "Code",
+}
+ALL_SUBTASKS = map(lambda x: x.strip(), open('datasets.txt', 'r').readlines())
+
+parser = ArgumentParser()
+parser.add_argument("--file", type=str, default="result.json")
+parser.add_argument("--full-file", type=str, default=None)
+parser.add_argument("--exp-id", type=str, default="w32_L2_cc")
+parser.add_argument("--full-exp-id", type=str, default="w32_L2_cc")
+parser.add_argument("--subsets", type=str, nargs="+",
+                    default=ONE_SUBTASK_PER_CATEGORY)
+parser
+parser.add_argument("--by-category", action="store_true")
+parser.add_argument("--exclude-category", type=str, nargs="*", default=[])
+parser.add_argument("--save-dir", type=str, default="./")
+args = parser.parse_args()
+
+
+
+if args.full_file is None:
+    args.full_file = args.file
+
+if args.full_exp_id is None:
+    args.full_exp_id = args.exp_id
+
+print(args.full_exp_id)
+
+if args.subsets == ['one-per-category']:
+    args.subsets = ONE_SUBTASK_PER_CATEGORY
+elif args.subsets == ['all']:
+    args.subsets = ALL_SUBTASKS
+
+results = json.load(open(args.file, 'r'))
+full_results = json.load(open(args.full_file, 'r'))
+
+if args.by_category:
+    category_results = {}
+    category_subtask_cnt = {}
+    for dset in args.subsets:
+        try:
+            category = SUBTASK_CATEGORIES[dset]
+            if category in args.exclude_category:
+                continue
+            full_score = full_results[f'{dset}-full_{args.full_exp_id}']
+            dset_results = [results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
+                            for cr in COMPRESSION_RATE_STRINGS]
+            category_results[category] = [cr + dr
+                                        for cr, dr in
+                                        zip(category_results.get(category, [0] * len(COMPRESSION_RATE_STRINGS)),
+                                                                dset_results)]
+            category_subtask_cnt[category] = category_subtask_cnt.get(category, 0) + 1
+        except:
+            pass
+    for c, r in category_results.items():
+        results = [100.0] + [r_ / category_subtask_cnt[c] for r_ in r]
+        plt.plot(COMPRESSION_RATES, results, label=c)
+else:
+    for dset in args.subsets:
+        try:
+            full_score = full_results[f'{dset}-full_{args.full_exp_id}']
+            dset_results = ([100.] +
+                    [results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
+                        for cr in COMPRESSION_RATE_STRINGS])
+            plt.plot(COMPRESSION_RATES, dset_results, label=dset)
+        except:
+            pass
+
+ymin, ymax = plt.ylim()
+ymax = min(ymax, 107.0)
+plt.ylim(ymin, ymax)
+
+plt.legend()
+plt.xscale('log')
+plt.xticks(COMPRESSION_RATES, labels=COMPRESSION_RATES)
+plt.title("LongBench subtask performance")
+plt.xlabel("compression rate")
+plt.ylabel("% performance")
+plt.grid()
+plt.savefig(f'{args.save_dir}/longbench_score_by_cr.jpg')
+plt.savefig(f'{args.save_dir}/longbench_score_by_cr.pdf')
+plt.show()
diff --git a/experiments/plot_longbench_categories.py b/experiments/plot_longbench_categories.py
@@ -0,0 +1,102 @@
+import matplotlib.pyplot as plt
+import json
+from argparse import ArgumentParser
+
+color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
+COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
+COMPRESSION_RATE_STRINGS = ['2.0x', '4.0x', '8.0x', '16.0x', '32.0x', '64.0x']
+ONE_SUBTASK_PER_CATEGORY = ['narrativeqa', 'hotpotqa', 'gov_report', 'trec',
+                            'passage_retrieval_en', 'lcc']
+SUBTASK_CATEGORIES = {
+    "narrativeqa": "Single-Doc. QA",
+    "qasper": "Single-Doc. QA",
+    "multifieldqa_en": "Single-Doc. QA",
+    "hotpotqa": "Multi-Doc. QA",
+    "2wikimqa": "Multi-Doc. QA",
+    "musique": "Multi-Doc. QA",
+    "gov_report": "Summarization",
+    "qmsum": "Summarization",
+    "multi_news": "Summarization",
+    "trec": "Few-shot Learning",
+    "triviaqa": "Few-shot Learning",
+    "samsum": "Few-shot Learning",
+    "passage_count": "Synthetic",
+    "passage_retrieval_en": "Synthetic",
+    "lcc": "Code",
+    "repobench-p": "Code",
+}
+ALL_SUBTASKS = map(lambda x: x.strip(), open('datasets.txt', 'r').readlines())
+ALL_CATEGORIES = ["Single-Doc. QA", "Multi-Doc. QA", "Summarization",
+                  "Few-shot Learning", "Synthetic", "Code"]
+
+parser = ArgumentParser()
+parser.add_argument("--file", type=str, default="result.json")
+parser.add_argument("--full-file", type=str, default=None)
+parser.add_argument("--exp-id", type=str, default="w32_L2_cc")
+parser.add_argument("--full-exp-id", type=str, default="w32_L2_cc")
+parser.add_argument("--save-dir", type=str, default="./")
+parser.add_argument("--ylim", type=str, nargs=2, default=("None", "None"))
+args = parser.parse_args()
+
+set_ymin, set_ymax = map(lambda x: None if x.lower() == "none" else float(x), args.ylim)
+
+if args.full_file is None:
+    args.full_file = args.file
+
+if args.full_exp_id is None:
+    args.full_exp_id = args.exp_id
+
+print(args.full_exp_id)
+
+results = json.load(open(args.file, 'r'))
+full_results = json.load(open(args.full_file, 'r'))
+
+fig, ax = plt.subplots(2, 3, figsize=(12, 7))
+
+
+for i, category in enumerate(ALL_CATEGORIES):
+    x, y = i // 3, i % 3
+    ax_ = ax[x, y]
+    for dset, dset_cat in SUBTASK_CATEGORIES.items():
+        if dset_cat == category:
+            try:
+                if category == "Code" and "70b" in args.file:
+                    full_score = full_results[f'{dset}-full_{args.full_exp_id}']
+                    dset_results = ([full_score] +
+                            [results[f'{dset}-{cr}_{args.exp_id}']
+                                for cr in COMPRESSION_RATE_STRINGS])
+                    ax_.plot(COMPRESSION_RATES, dset_results, label=dset)
+                else:
+                    full_score = full_results[f'{dset}-full_{args.full_exp_id}']
+                    dset_results = ([100.] +
+                            [results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
+                                for cr in COMPRESSION_RATE_STRINGS])
+                    ax_.plot(COMPRESSION_RATES, dset_results, label=dset)
+            except:
+                pass
+
+    ymin, ymax = ax_.get_ylim()
+    ymax = min(ymax, 107.0)
+    if set_ymin is not None:
+        ymin = set_ymin
+    if set_ymax is not None:
+        ymax = set_ymax
+    if category == "Code" and "70b" in args.file:
+        ax_.set_ylabel("absolute performance")
+    else:
+        ax_.set_ylim(ymin, ymax)
+        if y == 0:
+            ax_.set_ylabel("% performance")
+    ax_.legend(loc='lower left')
+    ax_.set_title(category)
+    ax_.set_xscale('log')
+    ax_.set_xticks(COMPRESSION_RATES)
+    ax_.set_xticklabels(COMPRESSION_RATES)
+    if x == 1:
+        ax_.set_xlabel("compression rate")
+    ax_.grid()
+
+
+plt.savefig(f'{args.save_dir}/longbench_score_by_cr_all.jpg')
+plt.savefig(f'{args.save_dir}/longbench_score_by_cr_all.pdf')
+plt.show()