ask_to_pdf_app / app.py
grhaputra's picture
Update app.py
7754845 verified
raw
history blame
1.52 kB
import gradio as gr
import fitz
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import OpenAI
openai_api_key = os.environ.get('OPENAI_API_KEY')
def read_pdf(pdf_file, prompt):
try:
text = ""
with open(pdf_file.name, "rb") as file:
doc = fitz.open(file)
for page in doc:
text += page.get_text()
# split the text into several chunks
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(text)
# download embedding from OpenAI
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
docsearch = FAISS.from_texts(texts, embeddings)
chain = load_qa_chain(OpenAI(api_key=openai_api_key), chain_type="stuff")
docs = docsearch.similarity_search(prompt)
answer = chain.run(input_documents = docs, question = prompt)
return answer
except Exception as e:
return f"Error: {str(e)}"
iface = gr.Interface(
read_pdf,
inputs=["file","text"],
outputs="text",
title="PDF Reader",
description="Upload a PDF file!",
)
iface.launch(share=True)