gptq_model / quant /__init__.py
ssaroya's picture
Upload 7 files
401522d
raw
history blame
312 Bytes
from .quantizer import Quantizer
from .fused_attn import QuantLlamaAttention, make_quant_attn
from .fused_mlp import QuantLlamaMLP, make_fused_mlp, autotune_warmup_fused
from .quant_linear import QuantLinear, make_quant_linear, autotune_warmup_linear
from .triton_norm import TritonLlamaRMSNorm, make_quant_norm