File size: 2,690 Bytes
176bc9a
b5553ae
 
 
6e90287
b5553ae
 
176bc9a
 
 
6e90287
 
 
b5553ae
 
6e90287
 
b5553ae
 
6e90287
 
 
 
 
 
 
176bc9a
6e90287
 
 
219d24c
6e90287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219d24c
6e90287
 
 
219d24c
 
 
6e90287
219d24c
176bc9a
 
 
 
 
 
 
 
6e90287
 
 
219d24c
b5553ae
 
219d24c
 
 
6e90287
219d24c
6e90287
b5553ae
176bc9a
 
 
 
b5553ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from flask import Flask, request, jsonify, send_from_directory
import requests
import os
from dotenv import load_dotenv
import logging

load_dotenv()

app = Flask(__name__)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%26quot%3B%3C%2Fspan%3E
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
HF_REPO_ID = os.getenv('HF_REPO_ID')
PDF_FILENAME = os.getenv('PDF_FILENAME')

def query(payload, model):
    try:
        response = requests.post(API_URL + model, headers=headers, json=payload)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        logger.error(f"Error querying model {model}: {str(e)}")
        return {"error": str(e)}

def process_pdf():
    pdf_url = f"https://huggingface.co/spaces/{HF_REPO_ID}/resolve/main/data/{PDF_FILENAME}"
    logger.info(f"Attempting to process PDF at URL: {pdf_url}")
    
    model = "impira/layoutlm-document-qa"
    payload = {
        "inputs": {
            "question": "Summarize the main points of this document.",
            "image": pdf_url
        }
    }
    
    response = query(payload, model)
    logger.info(f"PDF processing response: {response}")
    
    if 'error' in response:
        return f"Error processing PDF: {response['error']}"
    elif 'answer' in response:
        return response['answer']
    else:
        return str(response)

# Process PDF and get summary
pdf_summary = process_pdf()
logger.info(f"PDF Summary: {pdf_summary}")

# Get embeddings for the summary
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
summary_embedding = query({"inputs": pdf_summary}, embedding_model)[0] if not pdf_summary.startswith("Error") else None

@app.route('/')
def home():
    return send_from_directory('.', 'index.html')

@app.route('/ask', methods=['POST'])
def ask():
    prompt = request.json['question']
    
    if summary_embedding is None:
        return jsonify({'response': "I'm sorry, but I couldn't process the PDF correctly. Please check the PDF file and try again later."})
    
    # Get embedding for the question
    query_embedding = query({"inputs": prompt}, embedding_model)[0]
    
    # Calculate similarity
    similarity = sum(a*b for a, b in zip(query_embedding, summary_embedding))
    
    # Generate response using T5 model
    generator_model = "google/flan-t5-base"
    input_text = f"Context: {pdf_summary}\n\nQuestion: {prompt}\n\nAnswer:"
    response = query({"inputs": input_text}, generator_model)[0]["generated_text"]

    return jsonify({'response': response})

if __name__ == '__main__':
    app.run(debug=True)