Spaces:
Running
Running
from flask import Flask, request, jsonify, send_from_directory | |
import requests | |
import os | |
from dotenv import load_dotenv | |
import logging | |
load_dotenv() | |
app = Flask(__name__) | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} | |
HF_REPO_ID = os.getenv('HF_REPO_ID') | |
PDF_FILENAME = os.getenv('PDF_FILENAME') | |
def query(payload, model): | |
try: | |
response = requests.post(API_URL + model, headers=headers, json=payload) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.RequestException as e: | |
logger.error(f"Error querying model {model}: {str(e)}") | |
return {"error": str(e)} | |
def process_pdf(): | |
pdf_url = f"https://huggingface.co/spaces/{HF_REPO_ID}/resolve/main/data/{PDF_FILENAME}" | |
logger.info(f"Attempting to process PDF at URL: {pdf_url}") | |
model = "impira/layoutlm-document-qa" | |
payload = { | |
"inputs": { | |
"question": "Summarize the main points of this document.", | |
"image": pdf_url | |
} | |
} | |
response = query(payload, model) | |
logger.info(f"PDF processing response: {response}") | |
if 'error' in response: | |
return f"Error processing PDF: {response['error']}" | |
elif 'answer' in response: | |
return response['answer'] | |
else: | |
return str(response) | |
# Process PDF and get summary | |
pdf_summary = process_pdf() | |
logger.info(f"PDF Summary: {pdf_summary}") | |
# Get embeddings for the summary | |
embedding_model = "sentence-transformers/all-MiniLM-L6-v2" | |
summary_embedding = query({"inputs": pdf_summary}, embedding_model)[0] if not pdf_summary.startswith("Error") else None | |
def home(): | |
return send_from_directory('.', 'index.html') | |
def ask(): | |
prompt = request.json['question'] | |
if summary_embedding is None: | |
return jsonify({'response': "I'm sorry, but I couldn't process the PDF correctly. Please check the PDF file and try again later."}) | |
# Get embedding for the question | |
query_embedding = query({"inputs": prompt}, embedding_model)[0] | |
# Calculate similarity | |
similarity = sum(a*b for a, b in zip(query_embedding, summary_embedding)) | |
# Generate response using T5 model | |
generator_model = "google/flan-t5-base" | |
input_text = f"Context: {pdf_summary}\n\nQuestion: {prompt}\n\nAnswer:" | |
response = query({"inputs": input_text}, generator_model)[0]["generated_text"] | |
return jsonify({'response': response}) | |
if __name__ == '__main__': | |
app.run(debug=True) |