Spaces:
Running
Running
from flask import Flask, request, jsonify, send_from_directory | |
from pdf_processor import extract_text_from_pdf, process_pdf | |
from model_loader import load_model, load_generator | |
import torch | |
import numpy as np | |
app = Flask(__name__) | |
# Load data and models | |
pdf_text = extract_text_from_pdf("data/example.pdf") | |
chunks, index = process_pdf(pdf_text) | |
tokenizer, model = load_model() | |
generator = load_generator() | |
def home(): | |
return send_from_directory('.', 'index.html') | |
def ask(): | |
prompt = request.json['question'] | |
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) | |
with torch.no_grad(): | |
query_vector = model(**inputs).last_hidden_state.mean(dim=1).squeeze().numpy() | |
k = 3 | |
distances, indices = index.search(query_vector.astype('float32').reshape(1, -1), k) | |
context = " ".join([chunks[i] for i in indices[0]]) | |
input_text = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:" | |
response = generator(input_text, max_length=150, num_return_sequences=1)[0]['generated_text'] | |
return jsonify({'response': response}) | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=7860) |