From 1f2a354dc25103c7b5941df10a02b6222d813f8f Mon Sep 17 00:00:00 2001 From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com> Date: Mon, 14 Oct 2024 14:35:11 +0200 Subject: [PATCH] remove old code linked to llm-perf leaderboard (#291) --- .../update_llm_perf_cpu_pytorch.yaml | 52 ----- .../update_llm_perf_cuda_pytorch.yaml | 57 ------ .../update_llm_perf_leaderboard.yaml | 36 ---- llm_perf/__init__.py | 0 llm_perf/update_llm_perf_cpu_pytorch.py | 147 -------------- llm_perf/update_llm_perf_cuda_pytorch.py | 186 ------------------ llm_perf/update_llm_perf_leaderboard.py | 73 ------- llm_perf/utils.py | 137 ------------- 8 files changed, 688 deletions(-) delete mode 100644 .github/workflows/update_llm_perf_cpu_pytorch.yaml delete mode 100644 .github/workflows/update_llm_perf_cuda_pytorch.yaml delete mode 100644 .github/workflows/update_llm_perf_leaderboard.yaml delete mode 100644 llm_perf/__init__.py delete mode 100644 llm_perf/update_llm_perf_cpu_pytorch.py delete mode 100644 llm_perf/update_llm_perf_cuda_pytorch.py delete mode 100644 llm_perf/update_llm_perf_leaderboard.py delete mode 100644 llm_perf/utils.py diff --git a/.github/workflows/update_llm_perf_cpu_pytorch.yaml b/.github/workflows/update_llm_perf_cpu_pytorch.yaml deleted file mode 100644 index cdec51c4..00000000 --- a/.github/workflows/update_llm_perf_cpu_pytorch.yaml +++ /dev/null @@ -1,52 +0,0 @@ -name: Update LLM Perf Benchmarks - Intel PyTorch - -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - -concurrency: - cancel-in-progress: true - group: ${{ github.workflow }}-${{ github.ref }} - -env: - IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-cpu - -jobs: - run_benchmarks: - strategy: - fail-fast: false - matrix: - subset: [unquantized] - machine: - [{ name: 32vCPU-C7i, runs-on: { group: "aws-c7i-8xlarge-plus" } }] - - runs-on: ${{ matrix.machine.runs-on }} - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Run benchmarks - uses: addnab/docker-run-action@v3 - env: - SUBSET: ${{ matrix.subset }} - MACHINE: ${{ matrix.machine.name }} - HF_TOKEN: ${{ secrets.HF_TOKEN }} - with: - image: ${{ env.IMAGE }} - options: | - --rm - --shm-size 64G - --env SUBSET - --env MACHINE - --env HF_TOKEN - --env MKL_THREADING_LAYER=GNU - --env HF_HUB_ENABLE_HF_TRANSFER=1 - --volume ${{ github.workspace }}:/workspace - --workdir /workspace - run: | - pip install packaging && pip install einops scipy optimum codecarbon - pip install -U transformers huggingface_hub[hf_transfer] - pip install -e . - python llm_perf/update_llm_perf_cpu_pytorch.py diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml deleted file mode 100644 index 7c902b8c..00000000 --- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml +++ /dev/null @@ -1,57 +0,0 @@ -name: Update LLM Perf Benchmarks - CUDA PyTorch - -on: - workflow_dispatch: - schedule: - - cron: "0 0 * * *" - -concurrency: - cancel-in-progress: true - group: ${{ github.workflow }}-${{ github.ref }} - -env: - IMAGE: ghcr.io/huggingface/optimum-benchmark:latest-cuda - -jobs: - run_benchmarks: - strategy: - fail-fast: false - matrix: - subset: [unquantized, bnb, awq, gptq] - - machine: - [ - { name: 1xA10, runs-on: { group: "aws-g5-4xlarge-plus" } }, - { name: 1xT4, runs-on: { group: "aws-g4dn-2xlarge" } }, - ] - - runs-on: ${{ matrix.machine.runs-on }} - - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Run benchmarks - uses: addnab/docker-run-action@v3 - env: - SUBSET: ${{ matrix.subset }} - MACHINE: ${{ matrix.machine.name }} - HF_TOKEN: ${{ secrets.HF_TOKEN }} - with: - image: ${{ env.IMAGE }} - options: | - --rm - --gpus all - --shm-size 64G - --env SUBSET - --env MACHINE - --env HF_TOKEN - --env MKL_THREADING_LAYER=GNU - --env HF_HUB_ENABLE_HF_TRANSFER=1 - --volume ${{ github.workspace }}:/workspace - --workdir /workspace - run: | - pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon - pip install -U transformers huggingface_hub[hf_transfer] - pip install -e . - python llm_perf/update_llm_perf_cuda_pytorch.py diff --git a/.github/workflows/update_llm_perf_leaderboard.yaml b/.github/workflows/update_llm_perf_leaderboard.yaml deleted file mode 100644 index 10ed80c9..00000000 --- a/.github/workflows/update_llm_perf_leaderboard.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: Update LLM Perf Leaderboard - -on: - workflow_dispatch: - schedule: - - cron: "0 */6 * * *" - -concurrency: - cancel-in-progress: true - group: ${{ github.workflow }}-${{ github.ref }} - -jobs: - update_llm_perf_leaderboard: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - - name: Install requirements - run: | - pip install --upgrade pip - pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install pandas huggingface_hub[hf_transfer] - pip install . - - - name: Update Open LLM Data - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - HF_HUB_ENABLE_HF_TRANSFER: 1 - run: | - python llm_perf/update_llm_perf_leaderboard.py diff --git a/llm_perf/__init__.py b/llm_perf/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/llm_perf/update_llm_perf_cpu_pytorch.py b/llm_perf/update_llm_perf_cpu_pytorch.py deleted file mode 100644 index 25035550..00000000 --- a/llm_perf/update_llm_perf_cpu_pytorch.py +++ /dev/null @@ -1,147 +0,0 @@ -import os -import traceback -from itertools import product -from logging import getLogger - -from llm_perf.utils import ( - CANONICAL_PRETRAINED_OPEN_LLM_LIST, - GENERATE_KWARGS, - INPUT_SHAPES, - OPEN_LLM_LIST, - PRETRAINED_OPEN_LLM_LIST, - is_benchmark_conducted, -) -from optimum_benchmark import ( - Benchmark, - BenchmarkConfig, - BenchmarkReport, - InferenceConfig, - ProcessConfig, - PyTorchConfig, -) -from optimum_benchmark.logging_utils import setup_logging - -SUBSET = os.getenv("SUBSET", None) -MACHINE = os.getenv("MACHINE", None) -BACKEND = "pytorch" -HARDWARE = "cpu" - -if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None: - PUSH_REPO_ID = f"optimum-benchmark/llm-perf-{BACKEND}-{HARDWARE}-debug" - CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"] # noqa: F811 - SUBSET = "unquantized" -elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None: - PUSH_REPO_ID = f"optimum-benchmark/llm-perf-{BACKEND}-{HARDWARE}-{SUBSET}-{MACHINE}" -else: - raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging") - -ATTENTION_CONFIGS = ["eager", "sdpa"] - - -if SUBSET == "unquantized": - WEIGHTS_CONFIGS = { - # unquantized - "float32": {"torch_dtype": "float32", "quant_scheme": None, "quant_config": {}}, - "float16": {"torch_dtype": "float16", "quant_scheme": None, "quant_config": {}}, - "bfloat16": {"torch_dtype": "bfloat16", "quant_scheme": None, "quant_config": {}}, - } -else: - raise ValueError(f"Subset {SUBSET} not supported") - - -LOGGER = getLogger("llm-perf-backend") -LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}") -LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}") -LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}") - - -def is_benchmark_supported(weights_config, attn_implementation, hardware): - if attn_implementation == "flash_attention_2": - return False - - return True - - -def benchmark_cpu_pytorch(model, attn_implementation, weights_config): - benchmark_name = f"{weights_config}-{attn_implementation}-{BACKEND}" - subfolder = f"{benchmark_name}/{model.replace('/', '--')}" - - torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"] - quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"] - quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"] - - if not is_benchmark_supported(weights_config, attn_implementation, HARDWARE): - LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported") - return - - if is_benchmark_conducted(PUSH_REPO_ID, subfolder): - LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted") - return - - launcher_config = ProcessConfig() - scenario_config = InferenceConfig( - memory=True, - energy=True, - latency=True, - duration=10, - iterations=10, - warmup_runs=10, - input_shapes=INPUT_SHAPES, - generate_kwargs=GENERATE_KWARGS, - ) - - backend_config = PyTorchConfig( - model=model, - device="cpu", - no_weights=True, - library="transformers", - task="text-generation", - torch_dtype=torch_dtype, - quantization_scheme=quant_scheme, - quantization_config=quant_config, - attn_implementation=attn_implementation, - model_kwargs={"trust_remote_code": True}, - ) - - benchmark_config = BenchmarkConfig( - name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config - ) - - benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - try: - LOGGER.info(f"Running benchmark {benchmark_name} with model {model}") - benchmark_report = Benchmark.launch(benchmark_config) - benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - except Exception: - LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}") - benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()}) - benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - -if __name__ == "__main__": - # for isolated process - os.environ["LOG_TO_FILE"] = "0" - os.environ["LOG_LEVEL"] = "INFO" - - # for main process - setup_logging(level="INFO", prefix="MAIN-PROCESS") - - models_attentions_weights = list( - product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_CONFIGS, WEIGHTS_CONFIGS.keys()) - ) - - LOGGER.info( - f"Running a total of {len(models_attentions_weights)} benchmarks, " - f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, " - f"{len(ATTENTION_CONFIGS)} attentions implementations " - f"and {len(WEIGHTS_CONFIGS)} weights configurations." - ) - - for model, attn_implementation, weights_config in models_attentions_weights: - benchmark_cpu_pytorch(model, attn_implementation, weights_config) diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py deleted file mode 100644 index 98914f6a..00000000 --- a/llm_perf/update_llm_perf_cuda_pytorch.py +++ /dev/null @@ -1,186 +0,0 @@ -import os -import traceback -from itertools import product -from logging import getLogger - -from llm_perf.utils import ( - CANONICAL_PRETRAINED_OPEN_LLM_LIST, - GENERATE_KWARGS, - INPUT_SHAPES, - OPEN_LLM_LIST, - PRETRAINED_OPEN_LLM_LIST, - is_benchmark_conducted, -) -from optimum_benchmark import Benchmark, BenchmarkConfig, BenchmarkReport, InferenceConfig, ProcessConfig, PyTorchConfig -from optimum_benchmark.logging_utils import setup_logging - -SUBSET = os.getenv("SUBSET", None) -MACHINE = os.getenv("MACHINE", None) - -if os.getenv("MACHINE", None) is None and os.getenv("SUBSET", None) is None: - PUSH_REPO_ID = "optimum-benchmark/llm-perf-pytorch-cuda-debug" - CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"] # noqa: F811 - SUBSET = "unquantized" -elif os.getenv("MACHINE", None) is not None and os.getenv("SUBSET", None) is not None: - PUSH_REPO_ID = f"optimum-benchmark/llm-perf-pytorch-cuda-{SUBSET}-{MACHINE}" -else: - raise ValueError("Either both MACHINE and SUBSET should be set for benchmarking or neither for debugging") - -ATTENTION_CONFIGS = ["eager", "sdpa", "flash_attention_2"] -if SUBSET == "unquantized": - WEIGHTS_CONFIGS = { - # unquantized - "float32": {"torch_dtype": "float32", "quant_scheme": None, "quant_config": {}}, - "float16": {"torch_dtype": "float16", "quant_scheme": None, "quant_config": {}}, - "bfloat16": {"torch_dtype": "bfloat16", "quant_scheme": None, "quant_config": {}}, - } -elif SUBSET == "bnb": - WEIGHTS_CONFIGS = { - # bnb - "4bit-bnb": {"torch_dtype": "float16", "quant_scheme": "bnb", "quant_config": {"load_in_4bit": True}}, - "8bit-bnb": {"torch_dtype": "float16", "quant_scheme": "bnb", "quant_config": {"load_in_8bit": True}}, - } -elif SUBSET == "gptq": - WEIGHTS_CONFIGS = { - # gptq - "4bit-gptq-exllama-v1": { - "quant_scheme": "gptq", - "torch_dtype": "float16", - "quant_config": {"bits": 4, "use_exllama ": True, "version": 1, "model_seqlen": 256}, - }, - "4bit-gptq-exllama-v2": { - "torch_dtype": "float16", - "quant_scheme": "gptq", - "quant_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256}, - }, - } -elif SUBSET == "awq": - WEIGHTS_CONFIGS = { - # awq - "4bit-awq-gemm": { - "torch_dtype": "float16", - "quant_scheme": "awq", - "quant_config": {"bits": 4, "version": "gemm"}, - }, - "4bit-awq-gemv": { - "torch_dtype": "float16", - "quant_scheme": "awq", - "quant_config": {"bits": 4, "version": "gemv"}, - }, - "4bit-awq-exllama-v1": { - "torch_dtype": "float16", - "quant_scheme": "awq", - "quant_config": { - "bits": 4, - "version": "exllama", - "exllama_config": {"version": 1, "max_input_len": 64, "max_batch_size": 1}, - }, - }, - "4bit-awq-exllama-v2": { - "torch_dtype": "float16", - "quant_scheme": "awq", - "quant_config": { - "bits": 4, - "version": "exllama", - "exllama_config": {"version": 2, "max_input_len": 64, "max_batch_size": 1}, - }, - }, - } - - -LOGGER = getLogger("llm-perf-backend") -LOGGER.info(f"len(OPEN_LLM_LIST): {len(OPEN_LLM_LIST)}") -LOGGER.info(f"len(PRETRAINED_OPEN_LLM_LIST): {len(PRETRAINED_OPEN_LLM_LIST)}") -LOGGER.info(f"len(CANONICAL_PRETRAINED_OPEN_LLM_LIST): {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)}") - - -def is_benchmark_supported(weights_config, attn_implementation): - if attn_implementation == "flash_attention_2" and weights_config == "float32": - return False - - return True - - -def benchmark_cuda_pytorch(model, attn_implementation, weights_config): - benchmark_name = f"{weights_config}-{attn_implementation}" - subfolder = f"{benchmark_name}/{model.replace('/', '--')}" - - torch_dtype = WEIGHTS_CONFIGS[weights_config]["torch_dtype"] - quant_scheme = WEIGHTS_CONFIGS[weights_config]["quant_scheme"] - quant_config = WEIGHTS_CONFIGS[weights_config]["quant_config"] - - if not is_benchmark_supported(weights_config, attn_implementation): - LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it is not supported") - return - - if is_benchmark_conducted(PUSH_REPO_ID, subfolder): - LOGGER.info(f"Skipping benchmark {benchmark_name} with model {model} since it was already conducted") - return - - launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="kill") - scenario_config = InferenceConfig( - memory=True, - energy=True, - latency=True, - duration=10, - iterations=10, - warmup_runs=10, - input_shapes=INPUT_SHAPES, - generate_kwargs=GENERATE_KWARGS, - ) - backend_config = PyTorchConfig( - model=model, - device="cuda", - device_ids="0", - no_weights=True, - library="transformers", - task="text-generation", - torch_dtype=torch_dtype, - quantization_scheme=quant_scheme, - quantization_config=quant_config, - attn_implementation=attn_implementation, - model_kwargs={"trust_remote_code": True}, - ) - - benchmark_config = BenchmarkConfig( - name=benchmark_name, scenario=scenario_config, launcher=launcher_config, backend=backend_config - ) - - benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - try: - LOGGER.info(f"Running benchmark {benchmark_name} with model {model}") - benchmark_report = Benchmark.launch(benchmark_config) - benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - except Exception: - LOGGER.error(f"Benchmark {benchmark_name} failed with model {model}") - benchmark_report = BenchmarkReport.from_dict({"traceback": traceback.format_exc()}) - benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=subfolder, private=True) - - -if __name__ == "__main__": - # for isolated process - os.environ["LOG_TO_FILE"] = "0" - os.environ["LOG_LEVEL"] = "INFO" - - # for main process - setup_logging(level="INFO", prefix="MAIN-PROCESS") - - models_attentions_weights = list( - product(CANONICAL_PRETRAINED_OPEN_LLM_LIST, ATTENTION_CONFIGS, WEIGHTS_CONFIGS.keys()) - ) - - LOGGER.info( - f"Running a total of {len(models_attentions_weights)} benchmarks, " - f"with {len(CANONICAL_PRETRAINED_OPEN_LLM_LIST)} models, " - f"{len(ATTENTION_CONFIGS)} attentions implementations " - f"and {len(WEIGHTS_CONFIGS)} weights configurations." - ) - - for model, attn_implementation, weights_config in models_attentions_weights: - benchmark_cuda_pytorch(model, attn_implementation, weights_config) diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py deleted file mode 100644 index 4516750a..00000000 --- a/llm_perf/update_llm_perf_leaderboard.py +++ /dev/null @@ -1,73 +0,0 @@ -import subprocess -from glob import glob - -import pandas as pd -from huggingface_hub import create_repo, snapshot_download, upload_file -from tqdm import tqdm - -from optimum_benchmark import Benchmark - -REPO_TYPE = "dataset" -MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard" -PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}" - -PERF_DF = "perf-df-{subset}-{machine}.csv" -LLM_DF = "llm-df.csv" - - -def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str): - """ - Gather the benchmarks for a given machine - """ - perf_repo_id = PERF_REPO_ID.format(subset=subset, machine=machine, backend=backend, hardware=hardware) - snapshot = snapshot_download(repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"]) - - dfs = [] - for file in tqdm(glob(f"{snapshot}/**/benchmark.json", recursive=True)): - dfs.append(Benchmark.from_json(file).to_dataframe()) - benchmarks = pd.concat(dfs, ignore_index=True) - - perf_df = PERF_DF.format(subset=subset, machine=machine) - benchmarks.to_csv(perf_df, index=False) - create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, private=False, exist_ok=True) - upload_file(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=perf_df, path_or_fileobj=perf_df) - - -def update_perf_dfs(): - """ - Update the performance dataframes for all machines - """ - for machine in ["1xA10", "1xA100", "1xT4", "32vCPU-C7i"]: - for backend in ["pytorch"]: - for hardware in ["cuda", "cpu"]: - for subset in ["unquantized", "bnb", "awq", "gptq"]: - try: - gather_benchmarks(subset, machine, backend, hardware) - except Exception: - print( - f"benchmark for subset: {subset}, machine: {machine}, backend: {backend}, hardware: {hardware} not found" - ) - - -scrapping_script = """ -git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git -pip install -r scrape-open-llm-leaderboard/requirements.txt -python scrape-open-llm-leaderboard/main.py -rm -rf scrape-open-llm-leaderboard -""" - - -def update_llm_df(): - """ - Scrape the open-llm-leaderboard and update the leaderboard dataframe - """ - subprocess.run(scrapping_script, shell=True) - create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False) - upload_file( - repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, path_in_repo=LLM_DF, path_or_fileobj="open-llm-leaderboard.csv" - ) - - -if __name__ == "__main__": - update_llm_df() - update_perf_dfs() diff --git a/llm_perf/utils.py b/llm_perf/utils.py deleted file mode 100644 index 6a558428..00000000 --- a/llm_perf/utils.py +++ /dev/null @@ -1,137 +0,0 @@ -import pandas as pd - -from optimum_benchmark.benchmark.report import BenchmarkReport - -INPUT_SHAPES = {"batch_size": 1, "sequence_length": 256} -GENERATE_KWARGS = {"max_new_tokens": 64, "min_new_tokens": 64} - - -OPEN_LLM_LEADERBOARD = pd.read_csv("hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv") -OPEN_LLM_LIST = OPEN_LLM_LEADERBOARD.drop_duplicates(subset=["Model"])["Model"].tolist() -PRETRAINED_OPEN_LLM_LIST = ( - OPEN_LLM_LEADERBOARD[OPEN_LLM_LEADERBOARD["Type"] == "pretrained"] - .drop_duplicates(subset=["Model"])["Model"] - .tolist() -) -# CANONICAL_ORGANIZATIONS = [ -# # big companies -# *["google", "facebook", "meta", "meta-llama", "microsoft", "Intel", "TencentARC", "Salesforce"], -# # collectives -# *["EleutherAI", "tiiuae", "NousResearch", "Open-Orca"], -# # HF related -# ["bigcode", "HuggingFaceH4", "huggyllama"], -# # community members -# ["teknium"], -# # startups -# *[ -# "mistral-community", -# "openai-community", -# "togethercomputer", -# "stabilityai", -# "CohereForAI", -# "databricks", -# "mistralai", -# "internlm", -# "Upstage", -# "xai-org", -# "Phind", -# "01-ai", -# "Deci", -# "Qwen", -# ], -# ] -# CANONICAL_PRETRAINED_OPEN_LLM_LIST = [ -# model for model in PRETRAINED_OPEN_LLM_LIST if model.split("/")[0] in CANONICAL_ORGANIZATIONS -# ] -CANONICAL_PRETRAINED_OPEN_LLM_LIST = [ - "01-ai/Yi-6B", - "01-ai/Yi-34B", - "Deci/DeciLM-7B", - "Deci/DeciCoder-1b", - "EleutherAI/gpt-j-6b", - "EleutherAI/gpt-neo-1.3B", - "EleutherAI/gpt-neo-125m", - "EleutherAI/gpt-neo-2.7B", - "EleutherAI/gpt-neox-20b", - "EleutherAI/polyglot-ko-12.8b", - "EleutherAI/pythia-1.3b", - "EleutherAI/pythia-1.4b", - "EleutherAI/pythia-12b", - "EleutherAI/pythia-160m", - "EleutherAI/pythia-2.7b", - "EleutherAI/pythia-410m", - "EleutherAI/pythia-6.7b", - "EleutherAI/pythia-70m", - "Qwen/Qwen-7B", - "Qwen/Qwen-14B", - "Qwen/Qwen-72B", - "Qwen/Qwen1.5-0.5B", - "Qwen/Qwen1.5-1.8B", - "Qwen/Qwen1.5-4B", - "Qwen/Qwen1.5-7B", - "Qwen/Qwen1.5-14B", - "Qwen/Qwen1.5-32B", - "Qwen/Qwen1.5-72B", - "Qwen/Qwen1.5-110B", - "Qwen/Qwen1.5-MoE-A2.7B", - "Qwen/Qwen2-beta-14B", - "Qwen/Qwen2-beta-72B", - "Salesforce/codegen-6B-nl", - "Salesforce/codegen-16B-nl", - "TencentARC/Mistral_Pro_8B_v0.1", - "databricks/dbrx-base", - "facebook/opt-125m", - "facebook/opt-350m", - "facebook/opt-2.7b", - "facebook/opt-6.7b", - "facebook/opt-13b", - "facebook/opt-30b", - "facebook/opt-66b", - "facebook/xglm-564M", - "facebook/xglm-4.5B", - "facebook/xglm-7.5B", - "google/gemma-2b", - "google/gemma-7b", - "google/recurrentgemma-2b", - "google/recurrentgemma-9b", - "internlm/internlm-20b", - "internlm/internlm2-20b", - "huggyllama/llama-7b", - "huggyllama/llama-13b", - "huggyllama/llama-30b", - "huggyllama/llama-65b", - "meta-llama/Llama-2-7b-hf", - "meta-llama/Llama-2-13b-hf", - "meta-llama/Llama-2-70b-hf", - "meta-llama/Meta-Llama-3-8B", - "meta-llama/Meta-Llama-3-70B", - "microsoft/phi-1_5", - "microsoft/rho-math-1b-v0.1", - "mistralai/Mistral-7B-v0.1", - "mistralai/Mixtral-8x7B-v0.1", - "mistralai/Mixtral-8x22B-v0.1", - "openai-community/gpt2", - "openai-community/gpt2-large", - "stabilityai/stablelm-3b-4e1t", - "stabilityai/stablelm-2-1_6b", - "stabilityai/stablelm-2-12b", - "stabilityai/stablelm-base-alpha-3b", - "stabilityai/stablelm-base-alpha-7b", - "tiiuae/falcon-rw-1b", - "tiiuae/falcon-7b", - "tiiuae/falcon-40b", - "tiiuae/falcon-180B", - "togethercomputer/RedPajama-INCITE-Base-3B-v1", - "togethercomputer/RedPajama-INCITE-Base-7B-v0.1", -] - - -def is_benchmark_conducted(push_repo_id, subfolder): - try: - report = BenchmarkReport.from_pretrained(repo_id=push_repo_id, subfolder=subfolder) - if "traceback" in report.to_dict(): - return False - else: - return True - except Exception: - return False