Spaces:
Runtime error
Runtime error
vaishakgkumar
commited on
Commit
·
9781d5d
1
Parent(s):
e4c43f7
Update app.py
Browse files
app.py
CHANGED
@@ -1,76 +1,68 @@
|
|
1 |
-
from transformers import AutoTokenizer,
|
2 |
-
import torch
|
3 |
-
import gradio as gr
|
4 |
-
import random
|
5 |
-
from textwrap import wrap
|
6 |
-
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
|
7 |
-
from peft import PeftModel, PeftConfig
|
8 |
import torch
|
9 |
import gradio as gr
|
10 |
import os
|
11 |
import huggingface
|
12 |
from huggingface_hub import login
|
13 |
|
14 |
-
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
|
15 |
|
|
|
|
|
16 |
login(hf_token)
|
17 |
|
18 |
# Define the device
|
19 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
# tokenizer = AutoTokenizer.from_pretrained("vaishakgkumar/stablemedv3", trust_remote_code=True, padding_side="left")
|
27 |
-
tokenizer.pad_token = tokenizer.eos_token
|
28 |
-
tokenizer.padding_side = 'left'
|
29 |
|
30 |
-
|
31 |
-
peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
|
32 |
-
peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
|
33 |
-
peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
|
34 |
|
35 |
class ChatBot:
|
36 |
def __init__(self):
|
37 |
self.history = []
|
38 |
|
39 |
-
def predict(self, user_input, system_prompt="You are an expert
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
# Concatenate the user input with chat history
|
47 |
-
if len(self.history) > 0:
|
48 |
-
chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1)
|
49 |
-
else:
|
50 |
-
chat_history_ids = user_input_ids
|
51 |
-
|
52 |
-
# Generate a response using the PEFT model
|
53 |
-
response = peft_model.generate(input_ids=chat_history_ids, max_length=1200, pad_token_id=tokenizer.eos_token_id)
|
54 |
-
|
55 |
-
# Update chat history
|
56 |
-
self.history = chat_history_ids
|
57 |
-
|
58 |
-
# Decode and return the response
|
59 |
-
response_text = tokenizer.decode(response[0], skip_special_tokens=True)
|
60 |
return response_text
|
61 |
|
62 |
bot = ChatBot()
|
63 |
|
64 |
-
|
65 |
-
title = "StableDoc Chat"
|
66 |
description = """
|
67 |
-
You can use this Space to test out the current model vaishakgkumar/stablemedv3.
|
68 |
"""
|
|
|
|
|
69 |
iface = gr.Interface(
|
70 |
fn=bot.predict,
|
71 |
title=title,
|
72 |
description=description,
|
73 |
-
|
|
|
74 |
outputs="text",
|
75 |
theme="ParityError/Anime"
|
76 |
)
|
|
|
1 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import torch
|
3 |
import gradio as gr
|
4 |
import os
|
5 |
import huggingface
|
6 |
from huggingface_hub import login
|
7 |
|
|
|
8 |
|
9 |
+
# using hf token to login
|
10 |
+
hf_token = os.environ.get('HUGGINGFACE_TOKEN')
|
11 |
login(hf_token)
|
12 |
|
13 |
# Define the device
|
14 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
+
# Load tokenizer and model
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t')
|
18 |
+
config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
|
19 |
+
model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
20 |
+
model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1")
|
|
|
|
|
|
|
21 |
|
22 |
+
model.to(device)
|
|
|
|
|
|
|
23 |
|
24 |
class ChatBot:
|
25 |
def __init__(self):
|
26 |
self.history = []
|
27 |
|
28 |
+
def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
|
29 |
+
prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
|
30 |
+
inputs = tokenizer.apply_chat_template(
|
31 |
+
prompt,
|
32 |
+
add_generation_prompt=True,
|
33 |
+
return_tensors='pt'
|
34 |
+
)
|
35 |
+
|
36 |
+
# Generate a response using the model
|
37 |
+
tokens = model.generate(
|
38 |
+
inputs.to(model.device),
|
39 |
+
max_new_tokens=250,
|
40 |
+
temperature=0.8,
|
41 |
+
do_sample=False
|
42 |
+
)
|
43 |
+
|
44 |
+
# Decode the response
|
45 |
+
response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
|
46 |
+
|
47 |
+
# Free up memory
|
48 |
+
del tokens
|
49 |
+
torch.cuda.empty_cache()
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return response_text
|
52 |
|
53 |
bot = ChatBot()
|
54 |
|
55 |
+
title = "👋🏻Welcome to StableLM MED chat"
|
|
|
56 |
description = """
|
|
|
57 |
"""
|
58 |
+
examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]
|
59 |
+
|
60 |
iface = gr.Interface(
|
61 |
fn=bot.predict,
|
62 |
title=title,
|
63 |
description=description,
|
64 |
+
examples=examples,
|
65 |
+
inputs=["text", "text"],
|
66 |
outputs="text",
|
67 |
theme="ParityError/Anime"
|
68 |
)
|