baptistecolle HF staff commited on
Commit
371c49c
β€’
1 Parent(s): 3795233

fix(quant): add torchao to quantization option

Browse files
Files changed (2) hide show
  1. hardware.yaml +1 -0
  2. src/kernels.py +8 -1
hardware.yaml CHANGED
@@ -32,6 +32,7 @@
32
  - awq
33
  - bnb
34
  - gptq
 
35
  backends:
36
  - pytorch
37
 
 
32
  - awq
33
  - bnb
34
  - gptq
35
+ - torchao
36
  backends:
37
  - pytorch
38
 
src/kernels.py CHANGED
@@ -38,6 +38,7 @@ def get_quant_df(llm_perf_df):
38
  exllamav2_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "GPTQ.4bit+ExllamaV2")]
39
  gemm_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "AWQ.4bit+GEMM")]
40
  gemv_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "AWQ.4bit+GEMV")]
 
41
  # merge the three dataframes
42
  exllamav1_df = pd.merge(
43
  vanilla_df,
@@ -63,8 +64,14 @@ def get_quant_df(llm_perf_df):
63
  on=["Model πŸ€—"],
64
  suffixes=["", " Custom Kernel"],
65
  )
 
 
 
 
 
 
66
  # concat the two dataframes row-wise
67
- quant_df = pd.concat([exllamav1_df, exllamav2_df, gemm_df, gemv_df])
68
  # compute speedups
69
  quant_df["Prefill Speedup (%)"] = (
70
  (quant_df["Prefill (s)"] / quant_df["Prefill (s) Custom Kernel"]) * 100
 
38
  exllamav2_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "GPTQ.4bit+ExllamaV2")]
39
  gemm_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "AWQ.4bit+GEMM")]
40
  gemv_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "AWQ.4bit+GEMV")]
41
+ torchao_df = copy_df[(copy_df["Quantization πŸ—œοΈ"] == "torchao.4bit")]
42
  # merge the three dataframes
43
  exllamav1_df = pd.merge(
44
  vanilla_df,
 
64
  on=["Model πŸ€—"],
65
  suffixes=["", " Custom Kernel"],
66
  )
67
+ torchao_df = pd.merge(
68
+ vanilla_df,
69
+ torchao_df,
70
+ on=["Model πŸ€—"],
71
+ suffixes=["", " Custom Kernel"],
72
+ )
73
  # concat the two dataframes row-wise
74
+ quant_df = pd.concat([exllamav1_df, exllamav2_df, gemm_df, gemv_df, torchao_df])
75
  # compute speedups
76
  quant_df["Prefill Speedup (%)"] = (
77
  (quant_df["Prefill (s)"] / quant_df["Prefill (s) Custom Kernel"]) * 100