Spaces:

vaishakgkumar
/

stablemed2

Runtime error

App Files Files Community

vaishakgkumar commited on Dec 30, 2023

Commit

9781d5d

1 Parent(s): e4c43f7

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -44

app.py CHANGED Viewed

@@ -1,76 +1,68 @@
-from transformers import AutoTokenizer, MistralForCausalLM
-import torch
-import gradio as gr
-import random
-from textwrap import wrap
-from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
-from peft import PeftModel, PeftConfig
 import torch
 import gradio as gr
 import os
 import huggingface
 from huggingface_hub import login
-hf_token = os.environ.get('HUGGINGFACE_TOKEN')
 login(hf_token)
 # Define the device
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Use the base model's ID
-base_model_id = "stabilityai/stablelm-3b-4e1t"
-model_directory = "vaishakgkumar/stablemedv1"
-# Instantiate the Tokenizer
-tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True, padding_side="left")
-# tokenizer = AutoTokenizer.from_pretrained("vaishakgkumar/stablemedv3", trust_remote_code=True, padding_side="left")
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = 'left'
-# Load the PEFT model
-peft_config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1", token=hf_token)
-peft_model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", token=hf_token, trust_remote_code=True)
-peft_model = PeftModel.from_pretrained(peft_model, "vaishakgkumar/stablemedv1", token=hf_token)
 class ChatBot:
     def __init__(self):
         self.history = []
-    def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
-        # Combine user input and system prompt
-        formatted_input = f"{system_prompt}{user_input}"
-        # Encode user input
-        user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
-        # Concatenate the user input with chat history
-        if len(self.history) > 0:
-            chat_history_ids = torch.cat([self.history, user_input_ids], dim=-1)
-        else:
-            chat_history_ids = user_input_ids
-        # Generate a response using the PEFT model
-        response = peft_model.generate(input_ids=chat_history_ids, max_length=1200, pad_token_id=tokenizer.eos_token_id)
-        # Update chat history
-        self.history = chat_history_ids
-        # Decode and return the response
-        response_text = tokenizer.decode(response[0], skip_special_tokens=True)
         return response_text
 bot = ChatBot()
-title = "StableDoc Chat"
 description = """
-You can use this Space to test out the current model vaishakgkumar/stablemedv3.
 """
 iface = gr.Interface(
     fn=bot.predict,
     title=title,
     description=description,
-    inputs=["text"],  # Take user input and system prompt separately
     outputs="text",
     theme="ParityError/Anime"
 )

+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import gradio as gr
 import os
 import huggingface
 from huggingface_hub import login
+# using hf token to login
+hf_token = os.environ.get('HUGGINGFACE_TOKEN')
 login(hf_token)
 # Define the device
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained('stabilityai/stablelm-3b-4e1t')
+config = PeftConfig.from_pretrained("vaishakgkumar/stablemedv1")
+model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
+model = PeftModel.from_pretrained(model, "vaishakgkumar/stablemedv1")
+model.to(device)
 class ChatBot:
     def __init__(self):
         self.history = []
+    def predict(self, user_input, system_prompt="You are an expert analyst and provide assessment:"):
+        prompt = [{'role': 'user', 'content': user_input + "\n" + system_prompt + ":"}]
+        inputs = tokenizer.apply_chat_template(
+            prompt,
+            add_generation_prompt=True,
+            return_tensors='pt'
+        )
+        # Generate a response using the model
+        tokens = model.generate(
+            inputs.to(model.device),
+            max_new_tokens=250,
+            temperature=0.8,
+            do_sample=False
+        )
+        # Decode the response
+        response_text = tokenizer.decode(tokens[0], skip_special_tokens=False)
+        # Free up memory
+        del tokens
+        torch.cuda.empty_cache()
         return response_text
 bot = ChatBot()
+title = "👋🏻Welcome to StableLM MED chat"
 description = """
 """
+examples = [["What is the proper treatment for buccal herpes?", "Please provide information on the most effective antiviral medications and home remedies for treating buccal herpes."]]
 iface = gr.Interface(
     fn=bot.predict,
     title=title,
     description=description,
+    examples=examples,
+    inputs=["text", "text"],
     outputs="text",
     theme="ParityError/Anime"
 )