Skip to content

Commit

Permalink
add plot scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
IsaacRe committed Sep 26, 2024
1 parent 256ab96 commit 2a8eed3
Show file tree
Hide file tree
Showing 4 changed files with 476 additions and 0 deletions.
183 changes: 183 additions & 0 deletions experiments/plot_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
import matplotlib.pyplot as plt
import pandas as pd
from argparse import ArgumentParser
import os
from matplotlib.lines import Line2D

cm = plt.get_cmap('gist_rainbow')
color_cycle = list(plt.rcParams['axes.prop_cycle'].by_key()['color'])
NUM_COLORS = 12
new_colors = NUM_COLORS - len(color_cycle)
color_cycle.extend([cm(1.*i/NUM_COLORS) for i in range(NUM_COLORS)])
COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]

parser = ArgumentParser()
parser.add_argument("--file", type=str, default="out.csv")
parser.add_argument("--plot-input-lens", type=int, nargs="+", default=[-1])
parser.add_argument("--max-input-len", type=int, default=None)
parser.add_argument("--plot-cr-mult", type=float, nargs="+", default=[-1.0])
parser.add_argument("--save-dir", type=str, default=None)
parser.add_argument("--model", type=str, default=None)
parser.add_argument("--gpu", type=str, default=None)
args = parser.parse_args()

save_dir = args.save_dir
if save_dir is None:
save_dir = args.file.split('.')[0]
os.makedirs(save_dir, exist_ok=True)

df = pd.read_csv(args.file)
df = df[~df.req_per_s.isna()]

df = df[df.max_cache_tokens.apply(lambda x: x.split('-')[0][:-1] != '')]
df['compression_rate'] = df.max_cache_tokens.apply(lambda x: int(x.split('-')[0][:-1])
if x.split('-')[0] != 'full' else -1)
df['row_id'] = [str(i) + '_' + str(cr) for i, cr in zip(df.input_len, df.compression_rate)]

# average runs with same compression rate
df = df.groupby('row_id').agg({'input_len': 'max', 'compression_rate': 'max',
'max_batch_size': 'max', 'tok_per_s': 'mean'})
print(df)

df__ = df.groupby('compression_rate').agg({'input_len': 'max'})
max_shared_input_len = df__.input_len.min()
if args.max_input_len is not None:
max_shared_input_len = args.max_input_len
df = df[df.input_len <= max_shared_input_len]

max_compression_rate = df.compression_rate.max()
input_lengths = df.input_len.unique()
compression_rates = df.compression_rate[df.compression_rate != -1].unique()
if args.plot_cr_mult == [-1.0]:
args.plot_cr_mult = compression_rates

fig, ax = plt.subplots()

# throughput by max compression rate
assert len(input_lengths) <= len(color_cycle), f'{len(input_lengths)} > {len(color_cycle)}'
mults = []
mult_crs = []
mult_baselines = []
for cr in sorted(args.plot_cr_mult):
df_ = df[df.compression_rate == cr]
if args.plot_input_lens != [-1]:
df_ = df_[df_.input_len.apply(lambda x: x in args.plot_input_lens)]
max_row = df_[df_.tok_per_s == df_.tok_per_s.max()].iloc[0]
baseline = df[(df.input_len == max_row.input_len) & (df.compression_rate == -1)]
if len(baseline) > 0:
baseline = baseline.iloc[0]
mult = max_row.tok_per_s / baseline.tok_per_s
mults.append(mult)
mult_crs.append(max_row)
mult_baselines.append(baseline)
for c, input_len in zip(color_cycle, sorted(input_lengths)):
if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
continue
df_ = df[(df.input_len == input_len) & (df.compression_rate != -1)]
df_ = df_.sort_values('compression_rate')
plt.plot(df_.compression_rate, df_.tok_per_s, label=input_len, c=c, alpha=0.7)

# plot thrpt multiplier
for max_mult, max_mult_cr, max_mult_baseline in zip(mults, mult_crs, mult_baselines):
ax.plot([max_mult_cr.compression_rate * 1.05] * 2,
[max_mult_baseline.tok_per_s, max_mult_cr.tok_per_s],
linewidth=1,
c='black')
ax.text(max_mult_cr.compression_rate,
max_mult_baseline.tok_per_s + (max_mult_cr.tok_per_s - max_mult_baseline.tok_per_s) / 2,
s='%.2fx' % max_mult,
fontsize=10,
horizontalalignment='right')

xmin, xmax = ax.get_xlim()

for c, input_len in zip(color_cycle, sorted(input_lengths)):
if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
continue
baseline = df[(df.input_len == input_len) & (df.compression_rate == -1)]
if len(baseline) > 0:
baseline = baseline.iloc[0]
ax.plot([xmin, xmax], [baseline.tok_per_s] * 2, c=c, linestyle='--', linewidth=1, alpha=0.6)

legend_handles, _ = ax.get_legend_handles_labels()
ax.legend(handles=legend_handles + [Line2D([0], [0], color='black', ls='--', label='vanilla vLLM')])
ax.set_xscale('log')
ax.set_xticks(COMPRESSION_RATES)
ax.set_xticklabels(COMPRESSION_RATES)
ax.set_xlim(xmin, xmax)
ax.grid()
ax.set_title(f"{args.gpu} throughput for varied input length")
ax.set_xlabel("compression rate")
ax.set_ylabel("throughput (tok/sec)")
plt.savefig(f'{save_dir}/throughtput_by_cr.jpg')
plt.savefig(f'{save_dir}/throughtput_by_cr.pdf')
plt.show()
plt.clf()

# thoughput by input length
for compression_rate in sorted(compression_rates):
df_ = df[df.compression_rate == compression_rate].sort_values('input_len')
df_ = df_[df_.input_len <= max_shared_input_len]
plt.plot(df_.input_len, df_.tok_per_s, label=f'{compression_rate}x', alpha=0.7)
df_ = df[df.compression_rate == -1].sort_values('input_len')
plt.plot(df_.input_len, df_.tok_per_s, label='vanilla vLLM', linestyle='--', c='black')
plt.legend()
plt.grid()
plt.title(f"{args.gpu} - {args.model}")
plt.savefig(f'{save_dir}/throughput_by_len.pdf')
plt.savefig(f'{save_dir}/throughput_by_len.jpg')
plt.xlabel('input length')
plt.ylabel('throughput (tok/sec)')
plt.show()
plt.clf()

fig, ax = plt.subplots()

# max decoding batch by compression rate
for c, input_len in zip(color_cycle, sorted(input_lengths)):
if args.plot_input_lens != [-1] and input_len not in args.plot_input_lens:
continue
df_ = df[(df.input_len == input_len) & (df.compression_rate != -1)]
df_ = df_.sort_values('compression_rate')
ax.plot(df_.compression_rate, df_.max_batch_size, label=input_len, alpha=0.7, c=c)
xmin, xmax = plt.xlim()
for c, input_len in zip(color_cycle, sorted(input_lengths)):
if input_len not in args.plot_input_lens:
continue
df_ = df[(df.input_len == input_len) & (df.compression_rate == -1)]
if len(df_) > 0:
df_ = df_.iloc[0]
ax.plot([xmin, xmax], [df_.max_batch_size] * 2, c=c, linestyle='--',
linewidth=1, alpha=0.6)

legend_handles, _ = ax.get_legend_handles_labels()
ax.legend(handles=legend_handles + [Line2D([0], [0], color='black', ls='--', label='vanilla vLLM')])

ax.set_xscale('log')
ax.set_xticks(COMPRESSION_RATES)
ax.set_xticklabels(COMPRESSION_RATES)
ax.set_xlim(xmin, xmax)
ax.set_title(f"{args.gpu} - {args.model}")
ax.set_xlabel('compression rate')
ax.set_ylabel('batch size')
ax.grid()
plt.savefig(f'{save_dir}/max_batch_by_cr.pdf')
plt.savefig(f'{save_dir}/max_batch_by_cr.jpg')
plt.show()
plt.clf()

# max decoding batch by input length
for compression_rate in sorted(compression_rates):
df_ = df[df.compression_rate == compression_rate].sort_values('input_len')
plt.plot(df_.input_len, df_.max_batch_size, label=f'{compression_rate}x', alpha=0.7)
df_ = df[df.compression_rate == -1].sort_values('input_len')
if len(df_) > 0:
plt.plot(df_.input_len, df_.max_batch_size, c='black', linestyle='--', label='vanilla vLLM')
plt.legend()
plt.title('Max decoding batch by input length')
plt.xlabel('input length')
plt.ylabel('batch size')
plt.grid()
plt.savefig(f'{save_dir}/max_batch_by_len.pdf')
plt.savefig(f'{save_dir}/max_batch_by_len.jpg')
plt.show()
106 changes: 106 additions & 0 deletions experiments/plot_longbench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import matplotlib.pyplot as plt
import json
from argparse import ArgumentParser

color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
COMPRESSION_RATE_STRINGS = ['2.0x', '4.0x', '8.0x', '16.0x', '32.0x', '64.0x']
ONE_SUBTASK_PER_CATEGORY = ['narrativeqa', 'hotpotqa', 'gov_report', 'trec',
'passage_retrieval_en', 'lcc']
SUBTASK_CATEGORIES = {
"narrativeqa": "Single-Doc. QA",
"qasper": "Single-Doc. QA",
"multifieldqa_en": "Single-Doc. QA",
"hotpotqa": "Multi-Doc. QA",
"2wikimqa": "Multi-Doc. QA",
"musique": "Multi-Doc. QA",
"gov_report": "Summarization",
"qmsum": "Summarization",
"multi_news": "Summarization",
"trec": "Few-shot Learning",
"triviaqa": "Few-shot Learning",
"samsum": "Few-shot Learning",
"passage_count": "Synthetic",
"passage_retrieval_en": "Synthetic",
"lcc": "Code",
"repobench-p": "Code",
}
ALL_SUBTASKS = map(lambda x: x.strip(), open('datasets.txt', 'r').readlines())

parser = ArgumentParser()
parser.add_argument("--file", type=str, default="result.json")
parser.add_argument("--full-file", type=str, default=None)
parser.add_argument("--exp-id", type=str, default="w32_L2_cc")
parser.add_argument("--full-exp-id", type=str, default="w32_L2_cc")
parser.add_argument("--subsets", type=str, nargs="+",
default=ONE_SUBTASK_PER_CATEGORY)
parser
parser.add_argument("--by-category", action="store_true")
parser.add_argument("--exclude-category", type=str, nargs="*", default=[])
parser.add_argument("--save-dir", type=str, default="./")
args = parser.parse_args()



if args.full_file is None:
args.full_file = args.file

if args.full_exp_id is None:
args.full_exp_id = args.exp_id

print(args.full_exp_id)

if args.subsets == ['one-per-category']:
args.subsets = ONE_SUBTASK_PER_CATEGORY
elif args.subsets == ['all']:
args.subsets = ALL_SUBTASKS

results = json.load(open(args.file, 'r'))
full_results = json.load(open(args.full_file, 'r'))

if args.by_category:
category_results = {}
category_subtask_cnt = {}
for dset in args.subsets:
try:
category = SUBTASK_CATEGORIES[dset]
if category in args.exclude_category:
continue
full_score = full_results[f'{dset}-full_{args.full_exp_id}']
dset_results = [results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
for cr in COMPRESSION_RATE_STRINGS]
category_results[category] = [cr + dr
for cr, dr in
zip(category_results.get(category, [0] * len(COMPRESSION_RATE_STRINGS)),
dset_results)]
category_subtask_cnt[category] = category_subtask_cnt.get(category, 0) + 1
except:
pass
for c, r in category_results.items():
results = [100.0] + [r_ / category_subtask_cnt[c] for r_ in r]
plt.plot(COMPRESSION_RATES, results, label=c)
else:
for dset in args.subsets:
try:
full_score = full_results[f'{dset}-full_{args.full_exp_id}']
dset_results = ([100.] +
[results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
for cr in COMPRESSION_RATE_STRINGS])
plt.plot(COMPRESSION_RATES, dset_results, label=dset)
except:
pass

ymin, ymax = plt.ylim()
ymax = min(ymax, 107.0)
plt.ylim(ymin, ymax)

plt.legend()
plt.xscale('log')
plt.xticks(COMPRESSION_RATES, labels=COMPRESSION_RATES)
plt.title("LongBench subtask performance")
plt.xlabel("compression rate")
plt.ylabel("% performance")
plt.grid()
plt.savefig(f'{args.save_dir}/longbench_score_by_cr.jpg')
plt.savefig(f'{args.save_dir}/longbench_score_by_cr.pdf')
plt.show()
102 changes: 102 additions & 0 deletions experiments/plot_longbench_categories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import matplotlib.pyplot as plt
import json
from argparse import ArgumentParser

color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
COMPRESSION_RATES = [1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0]
COMPRESSION_RATE_STRINGS = ['2.0x', '4.0x', '8.0x', '16.0x', '32.0x', '64.0x']
ONE_SUBTASK_PER_CATEGORY = ['narrativeqa', 'hotpotqa', 'gov_report', 'trec',
'passage_retrieval_en', 'lcc']
SUBTASK_CATEGORIES = {
"narrativeqa": "Single-Doc. QA",
"qasper": "Single-Doc. QA",
"multifieldqa_en": "Single-Doc. QA",
"hotpotqa": "Multi-Doc. QA",
"2wikimqa": "Multi-Doc. QA",
"musique": "Multi-Doc. QA",
"gov_report": "Summarization",
"qmsum": "Summarization",
"multi_news": "Summarization",
"trec": "Few-shot Learning",
"triviaqa": "Few-shot Learning",
"samsum": "Few-shot Learning",
"passage_count": "Synthetic",
"passage_retrieval_en": "Synthetic",
"lcc": "Code",
"repobench-p": "Code",
}
ALL_SUBTASKS = map(lambda x: x.strip(), open('datasets.txt', 'r').readlines())
ALL_CATEGORIES = ["Single-Doc. QA", "Multi-Doc. QA", "Summarization",
"Few-shot Learning", "Synthetic", "Code"]

parser = ArgumentParser()
parser.add_argument("--file", type=str, default="result.json")
parser.add_argument("--full-file", type=str, default=None)
parser.add_argument("--exp-id", type=str, default="w32_L2_cc")
parser.add_argument("--full-exp-id", type=str, default="w32_L2_cc")
parser.add_argument("--save-dir", type=str, default="./")
parser.add_argument("--ylim", type=str, nargs=2, default=("None", "None"))
args = parser.parse_args()

set_ymin, set_ymax = map(lambda x: None if x.lower() == "none" else float(x), args.ylim)

if args.full_file is None:
args.full_file = args.file

if args.full_exp_id is None:
args.full_exp_id = args.exp_id

print(args.full_exp_id)

results = json.load(open(args.file, 'r'))
full_results = json.load(open(args.full_file, 'r'))

fig, ax = plt.subplots(2, 3, figsize=(12, 7))


for i, category in enumerate(ALL_CATEGORIES):
x, y = i // 3, i % 3
ax_ = ax[x, y]
for dset, dset_cat in SUBTASK_CATEGORIES.items():
if dset_cat == category:
try:
if category == "Code" and "70b" in args.file:
full_score = full_results[f'{dset}-full_{args.full_exp_id}']
dset_results = ([full_score] +
[results[f'{dset}-{cr}_{args.exp_id}']
for cr in COMPRESSION_RATE_STRINGS])
ax_.plot(COMPRESSION_RATES, dset_results, label=dset)
else:
full_score = full_results[f'{dset}-full_{args.full_exp_id}']
dset_results = ([100.] +
[results[f'{dset}-{cr}_{args.exp_id}'] / full_score * 100.
for cr in COMPRESSION_RATE_STRINGS])
ax_.plot(COMPRESSION_RATES, dset_results, label=dset)
except:
pass

ymin, ymax = ax_.get_ylim()
ymax = min(ymax, 107.0)
if set_ymin is not None:
ymin = set_ymin
if set_ymax is not None:
ymax = set_ymax
if category == "Code" and "70b" in args.file:
ax_.set_ylabel("absolute performance")
else:
ax_.set_ylim(ymin, ymax)
if y == 0:
ax_.set_ylabel("% performance")
ax_.legend(loc='lower left')
ax_.set_title(category)
ax_.set_xscale('log')
ax_.set_xticks(COMPRESSION_RATES)
ax_.set_xticklabels(COMPRESSION_RATES)
if x == 1:
ax_.set_xlabel("compression rate")
ax_.grid()


plt.savefig(f'{args.save_dir}/longbench_score_by_cr_all.jpg')
plt.savefig(f'{args.save_dir}/longbench_score_by_cr_all.pdf')
plt.show()
Loading

0 comments on commit 2a8eed3

Please sign in to comment.