Spaces:

optimum
/

llm-perf-leaderboard

Running

baptistecolle HF staff commited on 25 days ago

Commit

371c49c

•

1 Parent(s): 3795233

fix(quant): add torchao to quantization option

Files changed (2) hide show

hardware.yaml CHANGED Viewed

@@ -32,6 +32,7 @@
     - awq
     - bnb
     - gptq
   backends:
     - pytorch

     - awq
     - bnb
     - gptq
+    - torchao
   backends:
     - pytorch

src/kernels.py CHANGED Viewed

@@ -38,6 +38,7 @@ def get_quant_df(llm_perf_df):
     exllamav2_df = copy_df[(copy_df["Quantization 🗜️"] == "GPTQ.4bit+ExllamaV2")]
     gemm_df = copy_df[(copy_df["Quantization 🗜️"] == "AWQ.4bit+GEMM")]
     gemv_df = copy_df[(copy_df["Quantization 🗜️"] == "AWQ.4bit+GEMV")]
     # merge the three dataframes
     exllamav1_df = pd.merge(
         vanilla_df,
@@ -63,8 +64,14 @@ def get_quant_df(llm_perf_df):
         on=["Model 🤗"],
         suffixes=["", " Custom Kernel"],
     )
     # concat the two dataframes row-wise
-    quant_df = pd.concat([exllamav1_df, exllamav2_df, gemm_df, gemv_df])
     # compute speedups
     quant_df["Prefill Speedup (%)"] = (
         (quant_df["Prefill (s)"] / quant_df["Prefill (s) Custom Kernel"]) * 100

     exllamav2_df = copy_df[(copy_df["Quantization 🗜️"] == "GPTQ.4bit+ExllamaV2")]
     gemm_df = copy_df[(copy_df["Quantization 🗜️"] == "AWQ.4bit+GEMM")]
     gemv_df = copy_df[(copy_df["Quantization 🗜️"] == "AWQ.4bit+GEMV")]
+    torchao_df = copy_df[(copy_df["Quantization 🗜️"] == "torchao.4bit")]
     # merge the three dataframes
     exllamav1_df = pd.merge(
         vanilla_df,
         on=["Model 🤗"],
         suffixes=["", " Custom Kernel"],
     )
+    torchao_df = pd.merge(
+        vanilla_df,
+        torchao_df,
+        on=["Model 🤗"],
+        suffixes=["", " Custom Kernel"],
+    )
     # concat the two dataframes row-wise
+    quant_df = pd.concat([exllamav1_df, exllamav2_df, gemm_df, gemv_df, torchao_df])
     # compute speedups
     quant_df["Prefill Speedup (%)"] = (
         (quant_df["Prefill (s)"] / quant_df["Prefill (s) Custom Kernel"]) * 100