m96tkmok commited on
Commit
bea58f2
·
verified ·
1 Parent(s): e9ae107

Create app.py

Browse files

Create Llama-3.2 RAG PDF

Files changed (1) hide show
  1. app.py +152 -0
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import ollama
3
+ import os
4
+ import logging
5
+
6
+ from langchain_ollama import ChatOllama
7
+
8
+
9
+ from langchain_community.document_loaders import PyMuPDFLoader
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+
12
+ from langchain_ollama import OllamaEmbeddings
13
+
14
+ import faiss
15
+ from langchain_community.vectorstores import FAISS
16
+ from langchain_community.docstore.in_memory import InMemoryDocstore
17
+
18
+ from langchain import hub
19
+ from langchain_core.output_parsers import StrOutputParser
20
+ from langchain_core.runnables import RunnablePassthrough
21
+ from langchain_core.prompts import ChatPromptTemplate
22
+
23
+ from typing import List, Tuple, Dict, Any, Optional
24
+
25
+
26
+ # pip install -qU langchain-ollama
27
+ # pip install langchain
28
+
29
+ ##### Logging
30
+
31
+
32
+
33
+ def format_docs(docs):
34
+ return "\n\n".join([doc.page_content for doc in docs])
35
+
36
+ @st.cache_resource(show_spinner=True)
37
+ def extract_model_names(
38
+ models_info: Dict[str, List[Dict[str, Any]]],
39
+ ) -> Tuple[str, ...]:
40
+ """
41
+ Extract model names from the provided models information.
42
+
43
+ Args:
44
+ models_info (Dict[str, List[Dict[str, Any]]]): Dictionary containing information about available models.
45
+
46
+ Returns:
47
+ Tuple[str, ...]: A tuple of model names.
48
+ """
49
+
50
+ # Logging configuration
51
+ logging.basicConfig(
52
+ level=logging.INFO,
53
+ format="%(asctime)s - %(levelname)s - %(message)s",
54
+ datefmt="%Y-%m-%d %H:%M:%S",
55
+ )
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+ logger.info("Extracting model names from models_info")
60
+ model_names = tuple(model["name"] for model in models_info["models"])
61
+ logger.info(f"Extracted model names: {model_names}")
62
+ return model_names
63
+
64
+
65
+ def generate_response(rag_chain, input_text):
66
+
67
+ response = rag_chain.invoke(input_text)
68
+
69
+ return response
70
+
71
+
72
+
73
+
74
+ def main() -> None:
75
+
76
+ st.title("🧠 This is a RAG Chatbot with Ollama and Langchain !!!")
77
+
78
+ st.write("The LLM model unsloth/Llama-3.2-3B-Instruct is used")
79
+ st.write("You can upload a PDF to chat with !!!")
80
+
81
+ with st.sidebar:
82
+ st.title("PDF FILE UPLOAD:")
83
+ docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=False, key="pdf_uploader")
84
+
85
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
86
+
87
+ chunks = text_splitter.split_documents(docs)
88
+
89
+ embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")
90
+
91
+ single_vector = embeddings.embed_query("this is some text data")
92
+
93
+ index = faiss.IndexFlatL2(len(single_vector))
94
+
95
+ vector_store = FAISS(
96
+ embedding_function=embeddings,
97
+ index=index,
98
+ docstore=InMemoryDocstore(),
99
+ index_to_docstore_id={}
100
+ )
101
+
102
+ ids = vector_store.add_documents(documents=chunks)
103
+
104
+ ## Retreival
105
+
106
+ retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 3,
107
+ 'fetch_k': 100,
108
+ 'lambda_mult': 1})
109
+
110
+
111
+ prompt = """
112
+ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
113
+ If you don't know the answer, just say that you don't know.
114
+ Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
115
+ Question: {question}
116
+ Context: {context}
117
+ Answer:
118
+ """
119
+
120
+ prompt = ChatPromptTemplate.from_template(prompt)
121
+
122
+ model = ChatOllama(model="unsloth/Llama-3.2-3B-Instruct")
123
+
124
+ rag_chain = (
125
+ {"context": retriever|format_docs, "question": RunnablePassthrough()}
126
+ | prompt
127
+ | model
128
+ | StrOutputParser()
129
+ )
130
+
131
+
132
+ with st.form("llm-form"):
133
+ text = st.text_area("Enter your question or statement:")
134
+ submit = st.form_submit_button("Submit")
135
+
136
+ if "chat_history" not in st.session_state:
137
+ st.session_state['chat_history'] = []
138
+
139
+ if submit and text:
140
+ with st.spinner("Generating response..."):
141
+ response = generate_response(rag_chain, text)
142
+ st.session_state['chat_history'].append({"user": text, "ollama": response})
143
+ st.write(response)
144
+
145
+ st.write("## Chat History")
146
+ for chat in reversed(st.session_state['chat_history']):
147
+ st.write(f"**🧑 User**: {chat['user']}")
148
+ st.write(f"**🧠 Assistant**: {chat['ollama']}")
149
+ st.write("---")
150
+
151
+ if __name__ == "__main__":
152
+ main()