m96tkmok commited on
Commit
37b51d7
·
verified ·
1 Parent(s): b15df2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -7,6 +7,8 @@ from langchain_ollama import ChatOllama
7
 
8
 
9
  from langchain_community.document_loaders import PyMuPDFLoader
 
 
10
  from langchain_text_splitters import RecursiveCharacterTextSplitter
11
 
12
  from langchain_ollama import OllamaEmbeddings
@@ -78,13 +80,23 @@ def main() -> None:
78
  st.write("The LLM model unsloth/Llama-3.2-3B-Instruct is used")
79
  st.write("You can upload a PDF to chat with !!!")
80
 
81
- with st.sidebar:
82
- st.title("PDF FILE UPLOAD:")
83
- docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=False, key="pdf_uploader")
 
84
 
85
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
86
 
87
- chunks = text_splitter.split_documents(docs)
 
 
 
 
 
 
 
 
 
88
 
89
  embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")
90
 
 
7
 
8
 
9
  from langchain_community.document_loaders import PyMuPDFLoader
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+
12
  from langchain_text_splitters import RecursiveCharacterTextSplitter
13
 
14
  from langchain_ollama import OllamaEmbeddings
 
80
  st.write("The LLM model unsloth/Llama-3.2-3B-Instruct is used")
81
  st.write("You can upload a PDF to chat with !!!")
82
 
83
+ ## Ken 12/11/2024 Temp Comment out
84
+ #with st.sidebar:
85
+ # st.title("PDF FILE UPLOAD:")
86
+ # docs = st.file_uploader("Upload your PDF File and Click on the Submit & Process Button", accept_multiple_files=False, key="pdf_uploader")
87
 
88
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
89
 
90
+ ### Ken 12/11/2024 START
91
+
92
+ loader = PyPDFLoader(temp_file)
93
+ docs = loader.load()
94
+
95
+ raw_text = get_pdf(docs)
96
+ ### Ken 12/11/2024 END
97
+
98
+ #chunks = text_splitter.split_documents(docs)
99
+ chunks = text_splitter.split_documents(raw_text)
100
 
101
  embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")
102