Spaces:
Running
Running
user
commited on
Commit
·
b5553ae
1
Parent(s):
e5a2f3a
modifications for remote development using huggingface resouces
Browse files- app.py +31 -20
- character_generator.py +10 -0
- index.html +17 -44
- model_loader.py +8 -2
- pdf_processor.py +13 -9
- requirements.txt +2 -5
- run.py +4 -0
app.py
CHANGED
@@ -1,16 +1,25 @@
|
|
1 |
from flask import Flask, request, jsonify, send_from_directory
|
2 |
-
|
3 |
-
|
4 |
-
import
|
5 |
-
|
|
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
@app.route('/')
|
16 |
def home():
|
@@ -20,18 +29,20 @@ def home():
|
|
20 |
def ask():
|
21 |
prompt = request.json['question']
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
|
34 |
return jsonify({'response': response})
|
35 |
|
36 |
if __name__ == '__main__':
|
37 |
-
app.run(
|
|
|
1 |
from flask import Flask, request, jsonify, send_from_directory
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
|
10 |
+
API_URL = "https://api-inference.huggingface.co/models/"
|
11 |
+
headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
|
12 |
+
|
13 |
+
# Sample text for testing
|
14 |
+
sample_text = """
|
15 |
+
This is a sample text for testing our RAG chatbot.
|
16 |
+
It contains information about artificial intelligence and machine learning.
|
17 |
+
AI and ML are revolutionizing various industries and improving efficiency.
|
18 |
+
"""
|
19 |
+
|
20 |
+
def query(payload, model):
|
21 |
+
response = requests.post(API_URL + model, headers=headers, json=payload)
|
22 |
+
return response.json()
|
23 |
|
24 |
@app.route('/')
|
25 |
def home():
|
|
|
29 |
def ask():
|
30 |
prompt = request.json['question']
|
31 |
|
32 |
+
# Use sentence-transformers model for embedding
|
33 |
+
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
|
34 |
+
context_embedding = query({"inputs": sample_text}, embedding_model)[0]
|
35 |
+
query_embedding = query({"inputs": prompt}, embedding_model)[0]
|
36 |
+
|
37 |
+
# Simple dot product similarity
|
38 |
+
similarity = sum(a*b for a, b in zip(context_embedding, query_embedding))
|
39 |
+
|
40 |
+
# Generate response using T5 model
|
41 |
+
generator_model = "google/flan-t5-small"
|
42 |
+
input_text = f"Context: {sample_text}\n\nQuestion: {prompt}\n\nAnswer:"
|
43 |
+
response = query({"inputs": input_text}, generator_model)[0]["generated_text"]
|
44 |
|
45 |
return jsonify({'response': response})
|
46 |
|
47 |
if __name__ == '__main__':
|
48 |
+
app.run(debug=True)
|
character_generator.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def generate_character_response(context, prompt):
|
2 |
+
# Define character traits based on your PDF content
|
3 |
+
character_traits = "I am a knowledgeable and friendly AI assistant based on the content of the provided PDF."
|
4 |
+
|
5 |
+
input_text = f"Character: {character_traits}\nContext: {context}\nUser: {prompt}\nCharacter's response:"
|
6 |
+
|
7 |
+
# Use your text generation model here
|
8 |
+
response = generator(input_text, max_length=150, num_return_sequences=1)[0]['generated_text']
|
9 |
+
|
10 |
+
return response
|
index.html
CHANGED
@@ -3,59 +3,32 @@
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
-
<title>
|
7 |
-
<style>
|
8 |
-
body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
|
9 |
-
#chat-container { border: 1px solid #ddd; height: 400px; overflow-y: scroll; padding: 10px; margin-bottom: 10px; }
|
10 |
-
#user-input { width: 70%; padding: 5px; }
|
11 |
-
#send-button { padding: 5px 10px; }
|
12 |
-
</style>
|
13 |
</head>
|
14 |
<body>
|
15 |
-
<h1>
|
16 |
<div id="chat-container"></div>
|
17 |
<input type="text" id="user-input" placeholder="Ask a question...">
|
18 |
-
<button
|
19 |
|
20 |
<script>
|
21 |
-
const chatContainer = document.getElementById('chat-container');
|
22 |
-
const userInput = document.getElementById('user-input');
|
23 |
-
const sendButton = document.getElementById('send-button');
|
24 |
-
|
25 |
-
function addMessage(role, content) {
|
26 |
-
const messageDiv = document.createElement('div');
|
27 |
-
messageDiv.innerHTML = `<strong>${role}:</strong> ${content}`;
|
28 |
-
chatContainer.appendChild(messageDiv);
|
29 |
-
chatContainer.scrollTop = chatContainer.scrollHeight;
|
30 |
-
}
|
31 |
-
|
32 |
async function sendMessage() {
|
33 |
-
const
|
34 |
-
|
35 |
-
|
36 |
-
userInput.value = '';
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
});
|
46 |
-
const data = await response.json();
|
47 |
-
addMessage('Bot', data.response);
|
48 |
-
} catch (error) {
|
49 |
-
console.error('Error:', error);
|
50 |
-
addMessage('Bot', 'Sorry, there was an error processing your request.');
|
51 |
-
}
|
52 |
-
}
|
53 |
-
}
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
}
|
59 |
</script>
|
60 |
</body>
|
61 |
</html>
|
|
|
3 |
<head>
|
4 |
<meta charset="UTF-8">
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>RAG Chatbot</title>
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
</head>
|
8 |
<body>
|
9 |
+
<h1>RAG Chatbot</h1>
|
10 |
<div id="chat-container"></div>
|
11 |
<input type="text" id="user-input" placeholder="Ask a question...">
|
12 |
+
<button onclick="sendMessage()">Send</button>
|
13 |
|
14 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
async function sendMessage() {
|
16 |
+
const input = document.getElementById('user-input');
|
17 |
+
const message = input.value;
|
18 |
+
input.value = '';
|
|
|
19 |
|
20 |
+
const response = await fetch('/ask', {
|
21 |
+
method: 'POST',
|
22 |
+
headers: {
|
23 |
+
'Content-Type': 'application/json',
|
24 |
+
},
|
25 |
+
body: JSON.stringify({ question: message }),
|
26 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
const data = await response.json();
|
29 |
+
const chatContainer = document.getElementById('chat-container');
|
30 |
+
chatContainer.innerHTML += `<p><strong>You:</strong> ${message}</p><p><strong>Bot:</strong> ${data.response}</p>`;
|
31 |
+
}
|
32 |
</script>
|
33 |
</body>
|
34 |
</html>
|
model_loader.py
CHANGED
@@ -1,9 +1,15 @@
|
|
1 |
from transformers import AutoTokenizer, AutoModel, pipeline
|
|
|
2 |
|
3 |
def load_model():
|
4 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
5 |
-
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
6 |
return tokenizer, model
|
7 |
|
8 |
def load_generator():
|
9 |
-
return pipeline('text2text-generation', model='google/flan-t5-base')
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from transformers import AutoTokenizer, AutoModel, pipeline
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
|
4 |
def load_model():
|
5 |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
6 |
+
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2", device_map="cpu")
|
7 |
return tokenizer, model
|
8 |
|
9 |
def load_generator():
|
10 |
+
return pipeline('text2text-generation', model='google/flan-t5-base', device_map="cpu")
|
11 |
+
|
12 |
+
def download_pdf():
|
13 |
+
# Replace 'your_pdf_file.pdf' with the actual name of your PDF file on Hugging Face
|
14 |
+
file_path = hf_hub_download(repo_id="your_username/your_repo_name", filename="your_pdf_file.pdf")
|
15 |
+
return file_path
|
pdf_processor.py
CHANGED
@@ -1,22 +1,26 @@
|
|
1 |
-
import fitz
|
2 |
-
import faiss
|
3 |
import numpy as np
|
4 |
-
import
|
5 |
-
|
6 |
|
7 |
def extract_text_from_pdf(file_path):
|
|
|
8 |
with fitz.open(file_path) as doc:
|
9 |
-
|
|
|
|
|
10 |
|
11 |
def process_pdf(pdf_text):
|
12 |
chunks = [pdf_text[i:i+512] for i in range(0, len(pdf_text), 512)]
|
13 |
-
tokenizer
|
|
|
|
|
14 |
embeddings = []
|
15 |
for chunk in chunks:
|
16 |
-
inputs = tokenizer(chunk,
|
17 |
-
|
18 |
-
outputs = model(**inputs)
|
19 |
embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
|
|
|
20 |
embeddings = np.array(embeddings)
|
21 |
index = faiss.IndexFlatL2(embeddings.shape[1])
|
22 |
index.add(embeddings.astype('float32'))
|
|
|
1 |
+
import fitz # PyMuPDF
|
|
|
2 |
import numpy as np
|
3 |
+
from transformers import AutoTokenizer, AutoModel
|
4 |
+
import faiss
|
5 |
|
6 |
def extract_text_from_pdf(file_path):
|
7 |
+
text = ""
|
8 |
with fitz.open(file_path) as doc:
|
9 |
+
for page in doc:
|
10 |
+
text += page.get_text()
|
11 |
+
return text
|
12 |
|
13 |
def process_pdf(pdf_text):
|
14 |
chunks = [pdf_text[i:i+512] for i in range(0, len(pdf_text), 512)]
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
16 |
+
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
|
17 |
+
|
18 |
embeddings = []
|
19 |
for chunk in chunks:
|
20 |
+
inputs = tokenizer(chunk, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
21 |
+
outputs = model(**inputs)
|
|
|
22 |
embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().numpy())
|
23 |
+
|
24 |
embeddings = np.array(embeddings)
|
25 |
index = faiss.IndexFlatL2(embeddings.shape[1])
|
26 |
index.add(embeddings.astype('float32'))
|
requirements.txt
CHANGED
@@ -1,6 +1,3 @@
|
|
1 |
flask
|
2 |
-
|
3 |
-
|
4 |
-
PyMuPDF
|
5 |
-
faiss-cpu
|
6 |
-
numpy
|
|
|
1 |
flask
|
2 |
+
requests
|
3 |
+
python-dotenv
|
|
|
|
|
|
run.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app import app
|
2 |
+
|
3 |
+
if __name__ == '__main__':
|
4 |
+
app.run(debug=True)
|