File size: 4,114 Bytes
176bc9a
b5553ae
 
 
219d24c
 
 
 
 
b5553ae
 
176bc9a
 
 
b5553ae
 
219d24c
b5553ae
 
 
 
176bc9a
219d24c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176bc9a
 
 
 
 
 
 
 
219d24c
b5553ae
 
219d24c
 
 
 
 
 
b5553ae
219d24c
b5553ae
176bc9a
 
 
 
b5553ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from flask import Flask, request, jsonify, send_from_directory
import requests
import os
from dotenv import load_dotenv
import traceback
from PIL import Image
from pdf2image import convert_from_bytes
import base64
from io import BytesIO

load_dotenv()

app = Flask(__name__)

API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%26quot%3B%3C%2Fspan%3E
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
HF_REPO_ID = os.getenv('HF_REPO_ID')  # Your Hugging Face repo ID

def query(payload, model):
    response = requests.post(API_URL + model, headers=headers, json=payload)
    return response.json()

# Process PDFs using Hugging Face's PDF processing model
def process_pdfs():
    pdf_url = f"https://huggingface.co/spaces/{HF_REPO_ID}/resolve/main/data/your_pdf_file.pdf"
    
    try:
        # Download PDF
        pdf_response = requests.get(pdf_url)
        pdf_response.raise_for_status()  # This will raise an exception for HTTP errors
        
        print(f"PDF downloaded successfully. Content length: {len(pdf_response.content)} bytes")
        
        # Convert PDF to images
        images = convert_from_bytes(pdf_response.content)
        print(f"Converted PDF to {len(images)} images")
        
        # Process each image
        vision_model = "google/vit-base-patch16-224"
        summaries = []
        
        for i, image in enumerate(images):
            # Convert image to base64
            buffered = BytesIO()
            image.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()
            
            # Process image with vision model
            payload = {
                "inputs": [
                    {
                        "image": img_str,
                        "text": "Describe the content of this image in detail."
                    }
                ]
            }
            response = query(payload, vision_model)
            print(f"Page {i+1} processing response:", json.dumps(response, indent=2))
            
            if isinstance(response, list) and len(response) > 0 and 'generated_text' in response[0]:
                summaries.append(response[0]['generated_text'])
            else:
                summaries.append(f"Error processing page {i+1}")
        
        return " ".join(summaries)
    except Exception as e:
        print(f"Error in process_pdfs: {str(e)}")
        print(traceback.format_exc())
        return f"Error processing PDF: {str(e)}"

# Get the summary of PDFs
pdf_summary = process_pdfs()
print("PDF Summary:", pdf_summary)

# Get embeddings for the summary
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
if not pdf_summary.startswith("Error"):
    try:
        summary_embedding = query({"inputs": pdf_summary}, embedding_model)[0]
        print("Successfully created summary embedding")
    except Exception as e:
        print(f"Error getting embedding: {str(e)}")
        print(traceback.format_exc())
        summary_embedding = None
else:
    print("Skipping embedding due to PDF processing error")
    summary_embedding = None

if summary_embedding is None:
    print("WARNING: summary_embedding is None. The chatbot will not be able to provide meaningful responses.")

@app.route('/')
def home():
    return send_from_directory('.', 'index.html')

@app.route('/ask', methods=['POST'])
def ask():
    prompt = request.json['question']
    
    # Get embedding for the question
    query_embedding = query({"inputs": prompt}, embedding_model)[0]
    
    # Calculate similarity
    similarity = sum(a*b for a, b in zip(query_embedding, summary_embedding))
    
    # Generate character response
    character_traits = "I am a knowledgeable and friendly AI assistant based on the content of the provided PDFs."
    input_text = f"Character: {character_traits}\nContext: {pdf_summary}\nUser: {prompt}\nCharacter's response:"
    
    generator_model = "google/flan-t5-base"
    response = query({"inputs": input_text}, generator_model)[0]["generated_text"]

    return jsonify({'response': response})

if __name__ == '__main__':
    app.run(debug=True)