``` sudo docker run --rm \ -p 8080:80 \ -e GPTQ_BITS=4 \ -e GPTQ_GROUPSIZE=128 \ -e MAX_BEST_OF=1 \ -e MAX_BATCH_PREFILL_TOKENS=2048 \ --gpus '"device=0"' \ -v $PWD/data:/data ghcr.io/huggingface/text-generation-inference:sha-bce5e22 \ --model-id /data/WizardCoder-Python-34B-V1.0-GPTQ \ --quantize gptq ```