Spaces:
Running
on
Zero
Running
on
Zero
Daemontatox
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -99,10 +99,10 @@ h3 {
|
|
99 |
def initialize_model():
|
100 |
"""Initialize the model with appropriate configurations"""
|
101 |
quantization_config = BitsAndBytesConfig(
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
)
|
107 |
|
108 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
@@ -115,7 +115,7 @@ def initialize_model():
|
|
115 |
device_map="cuda",
|
116 |
attn_implementation="flash_attention_2",
|
117 |
trust_remote_code=True,
|
118 |
-
|
119 |
|
120 |
)
|
121 |
|
|
|
99 |
def initialize_model():
|
100 |
"""Initialize the model with appropriate configurations"""
|
101 |
quantization_config = BitsAndBytesConfig(
|
102 |
+
load_in_8bit=True,
|
103 |
+
bnb_8bit_compute_dtype=torch.bfloat16,
|
104 |
+
bnb_8bit_quant_type="nf4",
|
105 |
+
bnb_8bit_use_double_quant=True
|
106 |
)
|
107 |
|
108 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
|
|
|
115 |
device_map="cuda",
|
116 |
attn_implementation="flash_attention_2",
|
117 |
trust_remote_code=True,
|
118 |
+
quantization_config=quantization_config
|
119 |
|
120 |
)
|
121 |
|