Spaces:
Running
Running
File size: 2,690 Bytes
176bc9a b5553ae 6e90287 b5553ae 176bc9a 6e90287 b5553ae 6e90287 b5553ae 6e90287 176bc9a 6e90287 219d24c 6e90287 219d24c 6e90287 219d24c 6e90287 219d24c 176bc9a 6e90287 219d24c b5553ae 219d24c 6e90287 219d24c 6e90287 b5553ae 176bc9a b5553ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from flask import Flask, request, jsonify, send_from_directory
import requests
import os
from dotenv import load_dotenv
import logging
load_dotenv()
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%26quot%3B%3C%2Fspan%3E
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
HF_REPO_ID = os.getenv('HF_REPO_ID')
PDF_FILENAME = os.getenv('PDF_FILENAME')
def query(payload, model):
try:
response = requests.post(API_URL + model, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Error querying model {model}: {str(e)}")
return {"error": str(e)}
def process_pdf():
pdf_url = f"https://huggingface.co/spaces/{HF_REPO_ID}/resolve/main/data/{PDF_FILENAME}"
logger.info(f"Attempting to process PDF at URL: {pdf_url}")
model = "impira/layoutlm-document-qa"
payload = {
"inputs": {
"question": "Summarize the main points of this document.",
"image": pdf_url
}
}
response = query(payload, model)
logger.info(f"PDF processing response: {response}")
if 'error' in response:
return f"Error processing PDF: {response['error']}"
elif 'answer' in response:
return response['answer']
else:
return str(response)
# Process PDF and get summary
pdf_summary = process_pdf()
logger.info(f"PDF Summary: {pdf_summary}")
# Get embeddings for the summary
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
summary_embedding = query({"inputs": pdf_summary}, embedding_model)[0] if not pdf_summary.startswith("Error") else None
@app.route('/')
def home():
return send_from_directory('.', 'index.html')
@app.route('/ask', methods=['POST'])
def ask():
prompt = request.json['question']
if summary_embedding is None:
return jsonify({'response': "I'm sorry, but I couldn't process the PDF correctly. Please check the PDF file and try again later."})
# Get embedding for the question
query_embedding = query({"inputs": prompt}, embedding_model)[0]
# Calculate similarity
similarity = sum(a*b for a, b in zip(query_embedding, summary_embedding))
# Generate response using T5 model
generator_model = "google/flan-t5-base"
input_text = f"Context: {pdf_summary}\n\nQuestion: {prompt}\n\nAnswer:"
response = query({"inputs": input_text}, generator_model)[0]["generated_text"]
return jsonify({'response': response})
if __name__ == '__main__':
app.run(debug=True) |