Spaces:
Paused
Paused
nroggendorff
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -4,23 +4,24 @@ import torch
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
5 |
from threading import Thread
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
@spaces.GPU(duration=120)
|
8 |
def predict(message, history):
|
9 |
-
torch.set_default_device("cuda")
|
10 |
-
|
11 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
12 |
-
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
13 |
-
trust_remote_code=True
|
14 |
-
)
|
15 |
-
model = AutoModelForCausalLM.from_pretrained(
|
16 |
-
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
17 |
-
torch_dtype="auto",
|
18 |
-
load_in_4bit=True,
|
19 |
-
trust_remote_code=True
|
20 |
-
)
|
21 |
history_transformer_format = history + [[message, ""]]
|
22 |
-
|
23 |
-
system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
|
24 |
messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
|
25 |
input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
|
26 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
|
|
4 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
5 |
from threading import Thread
|
6 |
|
7 |
+
torch.set_default_device("cuda")
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
9 |
+
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
10 |
+
trust_remote_code=True
|
11 |
+
)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(
|
13 |
+
"cognitivecomputations/dolphin-2.9.1-mixtral-1x22b",
|
14 |
+
torch_dtype="auto",
|
15 |
+
load_in_4bit=True,
|
16 |
+
trust_remote_code=True
|
17 |
+
)
|
18 |
+
|
19 |
+
system_prompt = "<|im_start|>system\nYou are Dolphin, a helpful AI assistant.<|im_end|>"
|
20 |
+
|
21 |
+
|
22 |
@spaces.GPU(duration=120)
|
23 |
def predict(message, history):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
history_transformer_format = history + [[message, ""]]
|
|
|
|
|
25 |
messages = system_prompt + "".join(["".join(["\n<|im_start|>user\n" + item[0], "<|im_end|>\n<|im_start|>assistant\n" + item[1]]) for item in history_transformer_format])
|
26 |
input_ids = tokenizer([messages], return_tensors="pt").to('cuda')
|
27 |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|