Commit
•
abdc7a2
1
Parent(s):
1895bb8
Update handler.py
Browse files- handler.py +2 -1
handler.py
CHANGED
@@ -8,7 +8,8 @@ class EndpointHandler():
|
|
8 |
# Preload all the elements you are going to need at inference.
|
9 |
# pseudo:
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", use_fast=False)
|
11 |
-
self.model = AutoGPTQForCausalLM.from_quantized("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", device="cuda:0", use_triton=False, use_safetensors=True, trust_remote_code=True)
|
|
|
12 |
|
13 |
|
14 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
|
8 |
# Preload all the elements you are going to need at inference.
|
9 |
# pseudo:
|
10 |
self.tokenizer = AutoTokenizer.from_pretrained("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", use_fast=False)
|
11 |
+
self.model = AutoGPTQForCausalLM.from_quantized("philschmid/falcon-40b-instruct-GPTQ-inference-endpoints", device="cuda:0", use_triton=False, use_safetensors=True, torch_dtype=torch.float32, trust_remote_code=True)
|
12 |
+
|
13 |
|
14 |
|
15 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|