vilarin commited on
Commit
781f439
·
verified ·
1 Parent(s): 85dc104

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import time
3
  import spaces
4
  import torch
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
6
  import gradio as gr
7
  from threading import Thread
8
 
@@ -32,19 +32,12 @@ h3 {
32
 
33
  device = "cuda" # for GPU usage or "cpu" for CPU usage
34
 
35
- quantization_config = BitsAndBytesConfig(
36
- load_in_4bit=True,
37
- bnb_4bit_compute_dtype=torch.bfloat16,
38
- bnb_4bit_use_double_quant=True,
39
- bnb_4bit_quant_type= "nf4")
40
-
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
42
  model = AutoModelForCausalLM.from_pretrained(
43
  MODEL,
44
  torch_dtype=torch.bfloat16,
45
  device_map="auto",
46
- trust_remote_code=True,
47
- quantization_config=quantization_config)
48
 
49
  @spaces.GPU()
50
  def stream_chat(
@@ -83,7 +76,6 @@ def stream_chat(
83
  top_k = top_k,
84
  temperature = temperature,
85
  repetition_penalty=penalty,
86
- eos_token_id=tokenizer.eos_token_id,
87
  streamer=streamer,
88
  )
89
 
 
2
  import time
3
  import spaces
4
  import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
  import gradio as gr
7
  from threading import Thread
8
 
 
32
 
33
  device = "cuda" # for GPU usage or "cpu" for CPU usage
34
 
 
 
 
 
 
 
35
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
36
  model = AutoModelForCausalLM.from_pretrained(
37
  MODEL,
38
  torch_dtype=torch.bfloat16,
39
  device_map="auto",
40
+ trust_remote_code=True)
 
41
 
42
  @spaces.GPU()
43
  def stream_chat(
 
76
  top_k = top_k,
77
  temperature = temperature,
78
  repetition_penalty=penalty,
 
79
  streamer=streamer,
80
  )
81