nroggendorff commited on
Commit
295b4eb
·
verified ·
1 Parent(s): 0999dfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -4,23 +4,24 @@ import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @spaces.GPU(duration=120)
8
  def predict(message, history):
9
- torch.set_default_device("cuda")
10
-
11
- tokenizer = AutoTokenizer.from_pretrained(
12
- "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
13
- trust_remote_code=True
14
- )
15
- model = AutoModelForCausalLM.from_pretrained(
16
- "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
17
- torch_dtype="auto",
18
- load_in_4bit=True,
19
- trust_remote_code=True
20
- )
21
  history_transformer_format = history + [[message, ""]]
22
-
23
- system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
24
  messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
25
  input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
26
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
 
7
+ torch.set_default_device("cuda")
8
+ tokenizer = AutoTokenizer.from_pretrained(
9
+ "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
10
+ trust_remote_code=True
11
+ )
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ "cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
14
+ torch_dtype="auto",
15
+ load_in_4bit=True,
16
+ trust_remote_code=True
17
+ )
18
+
19
+ system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
20
+
21
+
22
  @spaces.GPU(duration=120)
23
  def predict(message, history):
 
 
 
 
 
 
 
 
 
 
 
 
24
  history_transformer_format = history + [[message, ""]]
 
 
25
  messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
26
  input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
27
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)