|
|
|
import PyPDF2 |
|
from getpass import getpass |
|
from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser |
|
from haystack.document_stores import InMemoryDocumentStore |
|
from haystack import Document, Pipeline |
|
from haystack.nodes import BM25Retriever |
|
from pprint import pprint |
|
import streamlit as st |
|
import logging |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
import os |
|
import logging |
|
logging.basicConfig(level=logging.DEBUG) |
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
text = "" |
|
with open(pdf_path, "rb") as pdf_file: |
|
pdf_reader = PyPDF2.PdfReader(pdf_file) |
|
for page_num in range(len(pdf_reader.pages)): |
|
page = pdf_reader.pages[page_num] |
|
text += page.extract_text() or "" |
|
return text |
|
|
|
|
|
pdf_file_path = "Data/MR. MPROFY.pdf" |
|
pdf_text = extract_text_from_pdf(pdf_file_path) |
|
if not pdf_text: |
|
raise ValueError("No text extracted from PDF.") |
|
|
|
|
|
doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) |
|
|
|
|
|
document_store = InMemoryDocumentStore(use_bm25=True) |
|
document_store.write_documents([doc]) |
|
|
|
|
|
retriever = BM25Retriever(document_store=document_store, top_k=2) |
|
|
|
|
|
qa_template = PromptTemplate( |
|
prompt=""" |
|
Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. |
|
I won’t ask any follow-up questions myself. |
|
If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. |
|
Context: {join(documents)}; |
|
Question: {query} |
|
Answer: |
|
""", |
|
output_parser=AnswerParser() |
|
) |
|
|
|
|
|
HF_TOKEN = os.getenv['HF_TOKEN'] |
|
|
|
|
|
prompt_node = PromptNode( |
|
model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", |
|
api_key=HF_TOKEN, |
|
default_prompt_template=qa_template, |
|
max_length=500, |
|
model_kwargs={"model_max_length": 5000} |
|
) |
|
|
|
|
|
rag_pipeline = Pipeline() |
|
rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) |
|
rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) |
|
|
|
|
|
def run_streamlit_app(): |
|
st.title("Mprofier - AI Assistant") |
|
query_text = st.text_input("Enter your question:") |
|
|
|
if st.button("Get Answer"): |
|
response = rag_pipeline.run(query=query_text) |
|
answer = response["answers"][0].answer if response["answers"] else "No answer found." |
|
st.write(answer) |
|
|
|
|
|
if __name__ == "__main__": |
|
run_streamlit_app() |