Skip to content

Commit

Permalink
update reporting tool to target a memory type
Browse files Browse the repository at this point in the history
  • Loading branch information
actions-user committed Oct 31, 2023
1 parent f2b3f8a commit 96ae22a
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 166 deletions.
7 changes: 6 additions & 1 deletion examples/running-llamas/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ This will create a folder called `experiments` with the results of the benchmark
To create a report run:

```bash
python report.py -e experiments
python report.py -e experiments -m allocated
```

Which will create some quick reporting artifacts like a `full_report.csv`, `short_report.csv`, some plots and a `rich_table.svg`.

`-e` is the experiments folder from which to read the results.
`-r` is the report folder to which to write the resulting artifacts.
`-m` is the memory type to use for the reporting. It can be `used`, `allocated` or `reserved`.


## Results

### On A100-80GB
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
178 changes: 89 additions & 89 deletions examples/running-llamas/artifacts/A100-80GB/rich_table.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 11 additions & 11 deletions examples/running-llamas/artifacts/A100-80GB/short_report.csv
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
experiment_name,GPU,Batch Size,Forward Latency (s),Forward Throughput (samples/s),Forward Max Memory Used (MB),Forward Max Memory Allocated (MB),Forward Max Memory Reserved (MB),Generate Throughput (tokens/s),Generate Max Memory Used (MB),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),Quantization Scheme
fp16-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.402,39.8,19165,16520,17779,471.0,27988,26442,84511,fp16
fp16-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.204,39.2,17087,15037,15701,290.0,64889,19997,63503,fp16
gptq-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.415,38.6,10900,7080,8604,333.0,65676,17002,83596,GPTQ
fp16-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.107,37.4,16022,14295,14636,147.0,26346,16774,24960,fp16
gptq-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.223,35.9,8826,5597,6530,206.0,56629,10557,54333,GPTQ
fp16-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0579,34.5,15392,13924,14006,75.3,17003,15162,15617,fp16
gptq-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.122,32.8,7761,4855,5465,134.0,18085,7335,15789,GPTQ
fp16-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0328,30.5,15153,13738,13767,37.9,15866,14356,14480,fp16
gptq-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0706,28.3,6872,4484,4575,66.5,8822,5722,6526,GPTQ
gptq-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0458,21.8,6746,4298,4450,34.6,7606,4916,5309,GPTQ
experiment_name,GPU,Batch Size,Forward Latency (s),Forward Throughput (samples/s),Forward Max Memory Used (MB),Forward Max Memory Allocated (MB),Forward Max Memory Reserved (MB),Generate Throughput (tokens/s),Generate Max Memory Used (MB),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),Quantization Scheme,Group
fp16-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.402,39.8,19165,16520,17779,471.0,27988,26442,84511,fp16,A100-fp16
fp16-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.204,39.2,17087,15037,15701,290.0,64889,19997,63503,fp16,A100-fp16
gptq-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.415,38.6,10900,7080,8604,333.0,65676,17002,83596,GPTQ,A100-GPTQ
fp16-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.107,37.4,16022,14295,14636,147.0,26346,16774,24960,fp16,A100-fp16
gptq-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.223,35.9,8826,5597,6530,206.0,56629,10557,54333,GPTQ,A100-GPTQ
fp16-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0579,34.5,15392,13924,14006,75.3,17003,15162,15617,fp16,A100-fp16
gptq-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.122,32.8,7761,4855,5465,134.0,18085,7335,15789,GPTQ,A100-GPTQ
fp16-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0328,30.5,15153,13738,13767,37.9,15866,14356,14480,fp16,A100-fp16
gptq-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0706,28.3,6872,4484,4575,66.5,8822,5722,6526,GPTQ,A100-GPTQ
gptq-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0458,21.8,6746,4298,4450,34.6,7606,4916,5309,GPTQ,A100-GPTQ
146 changes: 81 additions & 65 deletions examples/running-llamas/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from rich.terminal_theme import MONOKAI


def gather_inference_report(root_folder: Path) -> DataFrame:
def gather_full_report(root_folder: Path, report_folder: str = "artifacts") -> DataFrame:
# key is path to inference file as string, value is dataframe
inference_dfs = {
f.parent.absolute().as_posix(): pd.read_csv(f) for f in root_folder.glob("**/inference_results.csv")
Expand All @@ -37,6 +37,10 @@ def gather_inference_report(root_folder: Path) -> DataFrame:
# Concatenate all reports
inference_report = pd.concat(inference_reports, axis=0, ignore_index=True)
inference_report.set_index("experiment_name", inplace=True)

inference_report.sort_values(by="forward.throughput(samples/s)", ascending=False, inplace=True)
inference_report.to_csv(f"{report_folder}/full_report.csv")

return inference_report


Expand Down Expand Up @@ -77,7 +81,7 @@ def format_row(row, style=""):
return formated_row


def get_short_report(inference_report):
def get_short_report(full_report, report_folder: str = "artifacts"):
short_columns = {
"environment.gpus": "GPU",
"benchmark.input_shapes.batch_size": "Batch Size",
Expand All @@ -91,8 +95,8 @@ def get_short_report(inference_report):
"generate.max_memory_allocated(MB)": "Generate Max Memory Allocated (MB)",
"generate.max_memory_reserved(MB)": "Generate Max Memory Reserved (MB)",
}
short_report = inference_report[list(short_columns.keys())].rename(columns=short_columns)
short_report["Quantization Scheme"] = inference_report.index.str.split("-").str[0]
short_report = full_report[list(short_columns.keys())].rename(columns=short_columns)
short_report["Quantization Scheme"] = full_report.index.str.split("-").str[0]
short_report["Quantization Scheme"].fillna("unquantized", inplace=True)
short_report["Quantization Scheme"].replace("bnb", "BnB", inplace=True)
short_report["Quantization Scheme"].replace("gptq", "GPTQ", inplace=True)
Expand All @@ -103,10 +107,12 @@ def get_short_report(inference_report):

short_report["Group"] = short_report["GPU"] + "-" + short_report["Quantization Scheme"]

short_report.to_csv(f"{report_folder}/short_report.csv")

return short_report


def get_rich_table(short_report):
def get_rich_table(short_report, report_folder: str = "artifacts"):
# create rich table
rich_table = Table(show_header=True, show_lines=True)
# we add a column for the index
Expand All @@ -118,10 +124,14 @@ def get_rich_table(short_report):
for index, row in short_report.iterrows():
rich_table.add_row(index, *format_row(row.values, style=""))

console = Console(record=True)
console.print(rich_table, justify="center")
console.save_svg(f"{report_folder}/rich_table.svg", theme=MONOKAI, title="Inference Report")

return rich_table


def get_throughput_plot(short_report):
def get_plots(short_report, memory: str = "allocated", report_folder: str = "artifacts"):
# for each quantization scheme we plot the throughput vs batch size
fig1, ax1 = plt.subplots()
fig2, ax2 = plt.subplots()
Expand Down Expand Up @@ -166,42 +176,45 @@ def get_throughput_plot(short_report):
label=group,
marker="o",
)
ax3.plot(
forward_memory["Batch Size"],
forward_memory["Forward Max Memory Used (MB)"],
label=group + "-used",
marker="^",
)
ax3.plot(
forward_pytorch_max_memory_reserved["Batch Size"],
forward_pytorch_max_memory_reserved["Forward Max Memory Reserved (MB)"],
label=group + "-reserved",
marker=".",
)
ax3.plot(
forward_pytorch_max_memory_allocated["Batch Size"],
forward_pytorch_max_memory_allocated["Forward Max Memory Allocated (MB)"],
label=group + "-allocated",
marker="v",
)
ax4.plot(
generate_memory["Batch Size"],
generate_memory["Generate Max Memory Used (MB)"],
label=group + "-used",
marker="^",
)
ax4.plot(
generate_pytorch_max_memory_reserved["Batch Size"],
generate_pytorch_max_memory_reserved["Generate Max Memory Reserved (MB)"],
label=group + "-reserved",
marker=".",
)
ax4.plot(
generate_pytorch_max_memory_allocated["Batch Size"],
generate_pytorch_max_memory_allocated["Generate Max Memory Allocated (MB)"],
label=group + "-allocated",
marker="v",
)
if "used" in memory:
ax3.plot(
forward_memory["Batch Size"],
forward_memory["Forward Max Memory Used (MB)"],
label=group + "-used",
marker="^",
)
ax4.plot(
generate_memory["Batch Size"],
generate_memory["Generate Max Memory Used (MB)"],
label=group + "-used",
marker="^",
)
elif "reserved" in memory:
ax3.plot(
forward_pytorch_max_memory_reserved["Batch Size"],
forward_pytorch_max_memory_reserved["Forward Max Memory Reserved (MB)"],
label=group + "-reserved",
marker=".",
)
ax4.plot(
generate_pytorch_max_memory_reserved["Batch Size"],
generate_pytorch_max_memory_reserved["Generate Max Memory Reserved (MB)"],
label=group + "-reserved",
marker=".",
)
elif "allocated" in memory:
ax3.plot(
forward_pytorch_max_memory_allocated["Batch Size"],
forward_pytorch_max_memory_allocated["Forward Max Memory Allocated (MB)"],
label=group + "-allocated",
marker="*",
)
ax4.plot(
generate_pytorch_max_memory_allocated["Batch Size"],
generate_pytorch_max_memory_allocated["Generate Max Memory Allocated (MB)"],
label=group + "-allocated",
marker="*",
)

ax1.set_xlabel("Batch Size")
ax1.set_ylabel("Forward Latency (s)")
Expand All @@ -224,6 +237,11 @@ def get_throughput_plot(short_report):
ax3.legend(fancybox=True, shadow=True)
ax4.legend(fancybox=True, shadow=True)

fig1.savefig(f"{report_folder}/forward_latency_plot.png")
fig2.savefig(f"{report_folder}/generate_throughput_plot.png")
fig3.savefig(f"{report_folder}/forward_memory_plot.png")
fig4.savefig(f"{report_folder}/generate_memory_plot.png")

return fig1, fig2, fig3, fig4


Expand All @@ -236,43 +254,41 @@ def generate_report():
required=True,
help="The folder containing the results of experiments.",
)
parser.add_argument(
"--memory",
"-m",
nargs="*",
type=str,
required=True,
help="choose memory metric",
choices=["used", "reserved", "allocated"],
default="allocated",
)
parser.add_argument(
"--report-name",
"-r",
type=str,
required=False,
default="artifacts",
help="The name of the report.",
)

args = parser.parse_args()
report_folder = args.report_name
experiments_folders = args.experiments

if args.report_name:
report_folder = f"artifacts/{args.report_name}"
else:
report_folder = "artifacts"
Path(report_folder).mkdir(parents=True, exist_ok=True)

# gather experiments results
inference_report = gather_inference_report(experiments_folders)
inference_report.sort_values(by="forward.throughput(samples/s)", ascending=False, inplace=True)
inference_report.to_csv(f"{report_folder}/full_report.csv")

short_report = get_short_report(inference_report)
short_report.to_csv(f"{report_folder}/short_report.csv")

forward_throughput_plot, generate_throughput_plot, forward_memory_plot, generate_memory_plot = get_throughput_plot(
short_report
)
forward_throughput_plot.savefig(f"{report_folder}/forward_latency_plot.png")
generate_throughput_plot.savefig(f"{report_folder}/generate_throughput_plot.png")
forward_memory_plot.savefig(f"{report_folder}/forward_memory_plot.png")
generate_memory_plot.savefig(f"{report_folder}/generate_memory_plot.png")
if len(args.memory) == 0:
memory = ["used", "reserved", "allocated"]
else:
memory = args.memory

rich_table = get_rich_table(short_report)
console = Console(record=True)
console.print(rich_table, justify="center")
console.save_svg(f"{report_folder}/rich_table.svg", theme=MONOKAI, title="Inference Report")
# gather experiments results
full_report = gather_full_report(experiments_folders, report_folder=report_folder)
short_report = get_short_report(full_report, report_folder=report_folder)
figs = get_plots(short_report, memory=memory, report_folder=report_folder)
rich_table = get_rich_table(short_report, report_folder=report_folder)


if __name__ == "__main__":
Expand Down

0 comments on commit 96ae22a

Please sign in to comment.