from flask import Flask, request, jsonify, send_from_directory from pdf_processor import extract_text_from_pdf, process_pdf from model_loader import load_model, load_generator import torch import numpy as np app = Flask(__name__) # Load data and models pdf_text = extract_text_from_pdf("data/example.pdf") chunks, index = process_pdf(pdf_text) tokenizer, model = load_model() generator = load_generator() @app.route('/') def home(): return send_from_directory('.', 'index.html') @app.route('/ask', methods=['POST']) def ask(): prompt = request.json['question'] inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512) with torch.no_grad(): query_vector = model(**inputs).last_hidden_state.mean(dim=1).squeeze().numpy() k = 3 distances, indices = index.search(query_vector.astype('float32').reshape(1, -1), k) context = " ".join([chunks[i] for i in indices[0]]) input_text = f"Context: {context}\n\nQuestion: {prompt}\n\nAnswer:" response = generator(input_text, max_length=150, num_return_sequences=1)[0]['generated_text'] return jsonify({'response': response}) if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)