Spaces:

Ahaduzzaman
/

chatpdf

Running

App Files Files Community

chatpdf / api.py

Ahaduzzaman

Upload 3 files

35d610e over 1 year ago

raw

history blame contribute delete

3.61 kB

	from flask import Flask, request, jsonify
	import os
	from PyPDF2 import PdfReader
	import docx
	from langchain.chat_models import ChatOpenAI
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	app = Flask(__name__)

	# Define a plapyceholder for your OpenAI API key (replace with your actual key)
	openai_api_key = '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'

	# Define the file paths of the files in your local directory
	# Replace these paths with the actual paths of your files
	file_paths = ["cv.pdf"]

	# Initialize conversation chain globally
	conversation_chain = None

	def read_files(file_paths):
	text = ""
	for file_path in file_paths:
	file_extension = os.path.splitext(file_path)[1]
	if file_extension == ".pdf":
	text += get_pdf_text(file_path)
	elif file_extension == ".docx":
	text += get_docx_text(file_path)
	else:
	text += get_csv_text(file_path)
	return text

	def get_pdf_text(pdf_path):
	pdf_reader = PdfReader(pdf_path)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	def get_docx_text(docx_path):
	doc = docx.Document(docx_path)
	all_text = []
	for doc_para in doc.paragraphs:
	all_text.append(doc_para.text)
	text = ' '.join(all_text)
	return text

	def get_csv_text(csv_path):
	# Placeholder for CSV processing logic
	return "CSV content goes here"

	def get_text_chunks(text):
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=900,
	chunk_overlap=100,
	length_function=len
	)
	chunks = text_splitter.split_text(text)
	return chunks

	def get_vectorstore(text_chunks):
	embeddings = HuggingFaceEmbeddings()
	knowledge_base = FAISS.from_texts(text_chunks, embeddings)
	return knowledge_base

	def initialize_conversation_chain():
	global conversation_chain
	text_chunks = get_text_chunks(read_files(file_paths))
	vector_store = get_vectorstore(text_chunks)
	llm = ChatOpenAI(openai_api_key=openai_api_key, model_name='gpt-3.5-turbo', temperature=0.1)
	memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vector_store.as_retriever(),
	memory=memory
	)

	@app.route('/process_files', methods=['POST'])

	def process_files():
	global conversation_chain
	if conversation_chain is None:
	initialize_conversation_chain()

	return jsonify({"message": "Files processed successfully."})

	@app.route('/ask_question', methods=['POST'])
	def ask_question():
	user_question = request.form.get("question")

	if not user_question:
	return jsonify({"error": "Please provide a question."}), 400

	if conversation_chain is None:
	return jsonify({"error": "Conversation chain not initialized. Please process files first."}), 400

	response = conversation_chain({'question': user_question})
	chat_history = response['chat_history']
	response_message = chat_history[-1].content

	return jsonify({"response": response_message})

	def handle_user_input(user_question):
	with conversation_chain:
	response = conversation_chain({'question': user_question})
	return response['chat_history'][-1].content

	if __name__ == '__main__':
	app.run(debug=True)