Daemontatox commited on
Commit
9c46858
·
verified ·
1 Parent(s): 6b4d232

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -68,9 +68,9 @@ h3 {
68
  def initialize_model():
69
  """Initialize the model with appropriate configurations"""
70
  quantization_config = BitsAndBytesConfig(
71
- load_in_8bit=True,
72
- bnb_8bit_compute_dtype=torch.bfloat16,
73
- bnb_8bit_use_double_quant=True
74
  )
75
 
76
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -82,7 +82,8 @@ def initialize_model():
82
  torch_dtype=torch.float16,
83
  device_map="auto",
84
  attn_implementation="flash_attention_2",
85
- quantization_config=quantization_config
 
86
  )
87
 
88
  return model, tokenizer
 
68
  def initialize_model():
69
  """Initialize the model with appropriate configurations"""
70
  quantization_config = BitsAndBytesConfig(
71
+ load_in_4bit=True,
72
+ bnb_4bit_compute_dtype=torch.bfloat16,
73
+ bnb_4bit_use_double_quant=True
74
  )
75
 
76
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
82
  torch_dtype=torch.float16,
83
  device_map="auto",
84
  attn_implementation="flash_attention_2",
85
+ quantization_config=quantization_config,
86
+ llm_int8_enable_fp32_cpu_offload=True
87
  )
88
 
89
  return model, tokenizer