Daemontatox commited on
Commit
5a89ca0
·
verified ·
1 Parent(s): 80b15da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -99,10 +99,10 @@ h3 {
99
  def initialize_model():
100
  """Initialize the model with appropriate configurations"""
101
  quantization_config = BitsAndBytesConfig(
102
- load_in_4bit=True,
103
- bnb_4bit_compute_dtype=torch.bfloat16,
104
- bnb_4bit_quant_type="nf4",
105
- bnb_4bit_use_double_quant=True
106
  )
107
 
108
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
@@ -115,7 +115,7 @@ def initialize_model():
115
  device_map="cuda",
116
  attn_implementation="flash_attention_2",
117
  trust_remote_code=True,
118
- #quantization_config=quantization_config
119
 
120
  )
121
 
 
99
  def initialize_model():
100
  """Initialize the model with appropriate configurations"""
101
  quantization_config = BitsAndBytesConfig(
102
+ load_in_8bit=True,
103
+ bnb_8bit_compute_dtype=torch.bfloat16,
104
+ bnb_8bit_quant_type="nf4",
105
+ bnb_8bit_use_double_quant=True
106
  )
107
 
108
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID , trust_remote_code=True)
 
115
  device_map="cuda",
116
  attn_implementation="flash_attention_2",
117
  trust_remote_code=True,
118
+ quantization_config=quantization_config
119
 
120
  )
121