Spaces:
Running
Running
from flask import Flask, request, jsonify, send_from_directory | |
import requests | |
import os | |
from dotenv import load_dotenv | |
import traceback | |
from PIL import Image | |
from pdf2image import convert_from_bytes | |
import base64 | |
from io import BytesIO | |
load_dotenv() | |
app = Flask(__name__) | |
API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2F%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> | |
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"} | |
HF_REPO_ID = os.getenv('HF_REPO_ID') # Your Hugging Face repo ID | |
def query(payload, model): | |
response = requests.post(API_URL + model, headers=headers, json=payload) | |
return response.json() | |
# Process PDFs using Hugging Face's PDF processing model | |
def process_pdfs(): | |
pdf_url = f"https://huggingface.co/spaces/{HF_REPO_ID}/resolve/main/data/your_pdf_file.pdf" | |
try: | |
# Download PDF | |
pdf_response = requests.get(pdf_url) | |
pdf_response.raise_for_status() # This will raise an exception for HTTP errors | |
print(f"PDF downloaded successfully. Content length: {len(pdf_response.content)} bytes") | |
# Convert PDF to images | |
images = convert_from_bytes(pdf_response.content) | |
print(f"Converted PDF to {len(images)} images") | |
# Process each image | |
vision_model = "google/vit-base-patch16-224" | |
summaries = [] | |
for i, image in enumerate(images): | |
# Convert image to base64 | |
buffered = BytesIO() | |
image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode() | |
# Process image with vision model | |
payload = { | |
"inputs": [ | |
{ | |
"image": img_str, | |
"text": "Describe the content of this image in detail." | |
} | |
] | |
} | |
response = query(payload, vision_model) | |
print(f"Page {i+1} processing response:", json.dumps(response, indent=2)) | |
if isinstance(response, list) and len(response) > 0 and 'generated_text' in response[0]: | |
summaries.append(response[0]['generated_text']) | |
else: | |
summaries.append(f"Error processing page {i+1}") | |
return " ".join(summaries) | |
except Exception as e: | |
print(f"Error in process_pdfs: {str(e)}") | |
print(traceback.format_exc()) | |
return f"Error processing PDF: {str(e)}" | |
# Get the summary of PDFs | |
pdf_summary = process_pdfs() | |
print("PDF Summary:", pdf_summary) | |
# Get embeddings for the summary | |
embedding_model = "sentence-transformers/all-MiniLM-L6-v2" | |
if not pdf_summary.startswith("Error"): | |
try: | |
summary_embedding = query({"inputs": pdf_summary}, embedding_model)[0] | |
print("Successfully created summary embedding") | |
except Exception as e: | |
print(f"Error getting embedding: {str(e)}") | |
print(traceback.format_exc()) | |
summary_embedding = None | |
else: | |
print("Skipping embedding due to PDF processing error") | |
summary_embedding = None | |
if summary_embedding is None: | |
print("WARNING: summary_embedding is None. The chatbot will not be able to provide meaningful responses.") | |
def home(): | |
return send_from_directory('.', 'index.html') | |
def ask(): | |
prompt = request.json['question'] | |
# Get embedding for the question | |
query_embedding = query({"inputs": prompt}, embedding_model)[0] | |
# Calculate similarity | |
similarity = sum(a*b for a, b in zip(query_embedding, summary_embedding)) | |
# Generate character response | |
character_traits = "I am a knowledgeable and friendly AI assistant based on the content of the provided PDFs." | |
input_text = f"Character: {character_traits}\nContext: {pdf_summary}\nUser: {prompt}\nCharacter's response:" | |
generator_model = "google/flan-t5-base" | |
response = query({"inputs": input_text}, generator_model)[0]["generated_text"] | |
return jsonify({'response': response}) | |
if __name__ == '__main__': | |
app.run(debug=True) |