Spaces:
Sleeping
Sleeping
import gradio as gr | |
import fitz | |
import os | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.vectorstores.faiss import FAISS | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain_community.llms import OpenAI | |
openai_api_key = os.environ.get('OPENAI_API_KEY') | |
def read_pdf(pdf_file, prompt): | |
try: | |
text = "" | |
with open(pdf_file.name, "rb") as file: | |
doc = fitz.open(file) | |
for page in doc: | |
text += page.get_text() | |
# split the text into several chunks | |
text_splitter = CharacterTextSplitter( | |
separator = "\n", | |
chunk_size = 1000, | |
chunk_overlap = 200, | |
length_function = len, | |
) | |
texts = text_splitter.split_text(text) | |
# download embedding from OpenAI | |
embeddings = OpenAIEmbeddings(api_key=openai_api_key) | |
docsearch = FAISS.from_texts(texts, embeddings) | |
chain = load_qa_chain(OpenAI(api_key=openai_api_key), chain_type="stuff") | |
docs = docsearch.similarity_search(prompt) | |
answer = chain.run(input_documents = docs, question = prompt) | |
return answer | |
except Exception as e: | |
return f"Error: {str(e)}" | |
iface = gr.Interface( | |
read_pdf, | |
inputs=["file","text"], | |
outputs="text", | |
title="PDF Reader", | |
description="Upload a PDF file!", | |
) | |
iface.launch(share=True) |