phi-3-RAG / app.py
dmedhi's picture
chat appication
6917098
raw
history blame
5.44 kB
import datetime
import os
import uuid
import fitz
import streamlit as st
from langchain_text_splitters import RecursiveCharacterTextSplitter
from llama_cpp import Llama
from datastore import ChromaStore
from embeddings import Embedding
#### state
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "document_submitted" not in st.session_state:
st.session_state.document_submitted = False
def phi3(input: str, relevant_chunks: list):
llm = Llama(
model_path=os.path.join(
os.getcwd(),
"models",
"Phi-3.1-mini-4k-instruct-Q4_K_M.gguf",
),
n_ctx=2000,
n_threads=1, # The number of CPU threads to use,
n_gpu_layers=0, # The number of layers to offload to GPU,
)
prompt = f"""CONTENT: {relevant_chunks}\n\nQUESTION: {input}\n\nFrom the given CONTENT, Please answer the QUESTION."""
output = llm(
f"<|user|>\n{prompt}<|end|>\n<|assistant|>",
max_tokens=2000,
stop=["<|end|>"],
echo=True,
)
cleaned_output = output["choices"][0]["text"].split("<|assistant|>", 1)[-1].strip()
return cleaned_output
def generate_unique_id():
unique_id = uuid.uuid4()
current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
combined_id = f"{unique_id}-{current_time}"
return combined_id
def add_to_vectorstore(content: str, chunk_size: int = 500, chunk_overlap: int = 20):
chromastore = ChromaStore(collection_name="pdf_store")
# delete if already exist
if "pdf_store" in chromastore.list_collections():
chromastore.delete("pdf_store")
st.toast("Old database cleaned!")
collection = chromastore.create()
# chunkify content
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
is_separator_regex=False,
)
chunks = text_splitter.split_text(content)
# generate embeddings and ids
embeddings, ids = [], []
for i, chunk in enumerate(chunks):
embeddings.append(Embedding.encode_text(chunk).tolist())
ids.append(generate_unique_id())
# add to vectorstore
chromastore.add(
collection=collection,
embeddings=embeddings,
documents=chunks,
ids=ids,
)
def similarity_search(query: str):
chromastore = ChromaStore(collection_name="pdf_store")
collection = chromastore.create()
query_embedding = Embedding.encode_text(query).tolist()
return chromastore.query(collection=collection, query_embedding=query_embedding)
def main():
st.set_page_config(page_icon="πŸ€–", page_title="Phi 3 RAG", layout="wide")
st.markdown(
"""<h1 style="text-align:center;">Phi 3 RAG</h1>""", unsafe_allow_html=True
)
st.markdown(
"""<h3 style="text-align:center;">Conversational RAG application that utilizes local stack, <a href="https://huggingface.co/bartowski/Phi-3-medium-4k-instruct-GGUF">Phi-3 mini 4k instruct GGUF</a> and <a href="https://docs.trychroma.com/getting-started">ChromaDB</h3>""",
unsafe_allow_html=True,
)
layout = st.columns(2)
with layout[0]:
with st.container(border=True, height=550):
uploaded_file = st.file_uploader(
label="Upload document to search",
type="PDF",
accept_multiple_files=False,
)
submit = st.button("submit")
chunk_size = st.slider(
label="Chunk_size", min_value=100, max_value=2000, step=100
)
chunk_overlap = st.slider(
label="Chunk overlap", min_value=10, max_value=500, step=10
)
if uploaded_file is not None and submit is not False:
# load in vectorstore
doc = fitz.open(stream=uploaded_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
doc.close()
# add to vectorstore
add_to_vectorstore(text, chunk_size, chunk_overlap)
st.session_state.document_submitted = True
st.toast("Document added successfully added to vectorstore", icon="βœ…")
# chats
with layout[1]:
with st.container(border=True, height=550):
if st.session_state.document_submitted:
user_input = st.chat_input("Ask me!")
if user_input is not None:
st.session_state.chat_history.append(
{"role": "user", "content": str(user_input)}
)
with st.spinner("Thinking..."):
# find on vector store
relevant_chunks = similarity_search(user_input)
response = phi3(
input=user_input, relevant_chunks=relevant_chunks
)
st.session_state.chat_history.append(
{"role": "assistant", "content": str(response)}
)
# display messages
for message in reversed(st.session_state.chat_history):
with st.chat_message(message["role"]):
st.markdown(message["content"])
if __name__ == "__main__":
main()