|
LOGO = '<img src="https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/logo.png">' |
|
|
|
TITLE = """<h1 align="center" id="space-title">๐ค LLM-Perf Leaderboard ๐๏ธ</h1>""" |
|
|
|
INTRODUCTION = """ |
|
The ๐ค LLM-Perf Leaderboard ๐๏ธ aims to benchmark the performance (latency, throughput, memory & energy) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors. |
|
|
|
Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking: |
|
- Model evaluation requests should be made in the [๐ค Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the [๐ค LLM Performance Leaderboard ๐๏ธ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) automatically. |
|
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility. |
|
""" |
|
|
|
ABOUT = """<h3>About the ๐ค LLM-Perf Leaderboard ๐๏ธ</h3> |
|
<ul> |
|
<li>To avoid communication-dependent results, only one GPU is used.</li> |
|
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">๐ค Open LLM Leaderboard</a>.</li> |
|
<li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 256 tokens.</li> |
|
<li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li> |
|
<li>We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.</li> |
|
</ul> |
|
""" |
|
|
|
EXAMPLE_CONFIG = """ |
|
Here's an example of the configuration file used to benchmark the models with Optimum-Benchmark: |
|
```yaml |
|
defaults: |
|
- backend: pytorch |
|
- _base_ # inheriting from base config |
|
- _self_ # for hydra 1.1 compatibility |
|
|
|
experiment_name: pytorch+cuda+float16+gptq-4bit+exllama-v1 |
|
device: cuda |
|
|
|
backend: |
|
no_weights: true |
|
torch_dtype: float16 |
|
quantization_scheme: gptq |
|
quantization_config: |
|
bits: 4 |
|
use_cuda_fp16: false |
|
use_exllama: true |
|
exllama_config: |
|
version: 1 |
|
``` |
|
|
|
Where the base config is: |
|
```yaml |
|
defaults: |
|
- benchmark: inference # default benchmark |
|
- launcher: process # isolated process launcher |
|
- experiment # inheriting from experiment config |
|
- _self_ # for hydra 1.1 compatibility |
|
- override hydra/job_logging: colorlog # colorful logging |
|
- override hydra/hydra_logging: colorlog # colorful logging |
|
|
|
hydra: |
|
run: |
|
dir: dataset/${oc.env:HOSTNAME}/${experiment_name}/${model} |
|
job: |
|
chdir: true |
|
env_set: |
|
COUNTRY_ISO_CODE: FRA |
|
OVERRIDE_BENCHMARKS: 0 |
|
CUDA_VISIBLE_DEVICES: 0 |
|
CUDA_DEVICE_ORDER: PCI_BUS_ID |
|
|
|
backend: |
|
continuous_isolation: true |
|
|
|
benchmark: |
|
duration: 10 |
|
memory: true |
|
energy: true |
|
|
|
input_shapes: |
|
batch_size: 1 |
|
sequence_length: 256 |
|
|
|
new_tokens: 256 |
|
|
|
hub_kwargs: |
|
trust_remote_code: true |
|
``` |
|
""" |
|
|
|
|
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results." |
|
CITATION_BUTTON = r"""@misc{llm-perf-leaderboard, |
|
author = {Ilyas Moutawwakil, Rรฉgis Pierrard}, |
|
title = {LLM-Perf Leaderboard}, |
|
year = {2023}, |
|
publisher = {Hugging Face}, |
|
howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}", |
|
@software{optimum-benchmark, |
|
author = {Ilyas Moutawwakil, Rรฉgis Pierrard}, |
|
publisher = {Hugging Face}, |
|
title = {Optimum-Benchmark: A framework for benchmarking the performance of Transformers models with different hardwares, backends and optimizations.}, |
|
} |
|
""" |
|
|