Spaces:
Sleeping
Sleeping
File size: 1,517 Bytes
5ecf246 789a375 5ecf246 7754845 5ecf246 7754845 8368544 5ecf246 789a375 5ecf246 789a375 5ecf246 7754845 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import fitz
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain_community.llms import OpenAI
openai_api_key = os.environ.get('OPENAI_API_KEY')
def read_pdf(pdf_file, prompt):
try:
text = ""
with open(pdf_file.name, "rb") as file:
doc = fitz.open(file)
for page in doc:
text += page.get_text()
# split the text into several chunks
text_splitter = CharacterTextSplitter(
separator = "\n",
chunk_size = 1000,
chunk_overlap = 200,
length_function = len,
)
texts = text_splitter.split_text(text)
# download embedding from OpenAI
embeddings = OpenAIEmbeddings(api_key=openai_api_key)
docsearch = FAISS.from_texts(texts, embeddings)
chain = load_qa_chain(OpenAI(api_key=openai_api_key), chain_type="stuff")
docs = docsearch.similarity_search(prompt)
answer = chain.run(input_documents = docs, question = prompt)
return answer
except Exception as e:
return f"Error: {str(e)}"
iface = gr.Interface(
read_pdf,
inputs=["file","text"],
outputs="text",
title="PDF Reader",
description="Upload a PDF file!",
)
iface.launch(share=True) |