Spaces:
Sleeping
Sleeping
File size: 3,994 Bytes
f42fe21 6c72532 f42fe21 6c72532 f42fe21 a732210 f42fe21 a732210 f42fe21 6c72532 f42fe21 6c72532 a54b660 6c72532 15c6064 f42fe21 15c6064 6c72532 15c6064 f42fe21 a54b660 15c6064 a54b660 f42fe21 6c72532 f42fe21 a732210 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import time
import json
from datetime import datetime
class ChatApp:
def __init__(self):
st.set_page_config(page_title="Inspection Engineer Chat", page_icon="π", layout="wide")
self.initialize_session_state()
self.model_handler = self.load_model()
def initialize_session_state(self):
if "messages" not in st.session_state:
st.session_state.messages = [
{"role": "system", "content": "You are an experienced inspection methods engineer. Your task is to classify the following scope: analyze the scope provided in the input and determine the class item as an output."}
]
@staticmethod
@st.cache_resource
def load_model():
device = "cuda" if torch.cuda.is_available() else "cpu"
st.info(f"Using device: {device}")
model_name = "amiguel/classItem-FT-llama-3-1-8b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="auto",
load_in_8bit=device == "cuda"
)
return ModelHandler(model, tokenizer)
def display_message(self, role, content):
with st.chat_message(role):
st.write(content)
def get_user_input(self):
return st.chat_input("Type your message here...")
def stream_response(self, response):
placeholder = st.empty()
full_response = ""
for word in response.split():
full_response += word + " "
placeholder.markdown(full_response + "β")
time.sleep(0.01) # Adjust the sleep time between 0.01 and 0.05 for desired speed
placeholder.markdown(full_response)
return full_response
def save_chat_history(self):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"chat_history_{timestamp}.json"
with open(filename, "w") as f:
json.dump(st.session_state.messages, f, indent=2)
return filename
def run(self):
col1, col2 = st.columns([3, 1])
with col1:
st.title("Inspection Methods Engineer Assistant")
for message in st.session_state.messages[1:]:
self.display_message(message["role"], message["content"])
user_input = self.get_user_input()
if user_input:
self.display_message("user", user_input)
st.session_state.messages.append({"role": "user", "content": user_input})
# Here's the correction in how we format the conversation for the model
conversation = ""
for msg in st.session_state.messages:
conversation += f"{msg['role']}: {msg['content']}\n\n"
with st.spinner("Analyzing and classifying scope..."):
response = self.model_handler.generate_response(conversation.strip())
with st.chat_message("assistant"):
full_response = self.stream_response(response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
with col2:
st.sidebar.title("Chat Options")
if st.sidebar.button("Save Chat History"):
filename = self.save_chat_history()
st.sidebar.success(f"Chat history saved to {filename}")
class ModelHandler:
def __init__(self, model, tokenizer):
self.model = model
self.tokenizer = tokenizer
def generate_response(self, conversation):
inputs = self.tokenizer(conversation, return_tensors="pt").to(self.model.device)
outputs = self.model.generate(**inputs, max_new_tokens=100)
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
if __name__ == "__main__":
app = ChatApp()
app.run() |