Upload 7 files
Browse files- app.py +80 -0
- counseling_model.py +46 -0
- greet_counsel.py +62 -0
- greeting_model.py +18 -0
- medication_classification_model.py +24 -0
- medication_info_model.py +46 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request, jsonify, session
|
2 |
+
import tensorflow as tf
|
3 |
+
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
4 |
+
import joblib
|
5 |
+
import pandas as pd
|
6 |
+
import datetime
|
7 |
+
|
8 |
+
app = Flask(__name__)
|
9 |
+
app.secret_key = 'hassaanik' # Necessary for session management
|
10 |
+
|
11 |
+
|
12 |
+
# Load models and tokenizers
|
13 |
+
counseling_greeting_model = TFT5ForConditionalGeneration.from_pretrained('./models/counseling_greeting_model/saved_model')
|
14 |
+
counseling_greeting_tokenizer = T5Tokenizer.from_pretrained('./models/counseling_greeting_model/tokenizer')
|
15 |
+
|
16 |
+
med_info_model = TFT5ForConditionalGeneration.from_pretrained('./models/medication_info_model/saved_model')
|
17 |
+
med_info_tokenizer = T5Tokenizer.from_pretrained('./models/medication_info_model/tokenizer')
|
18 |
+
|
19 |
+
knn_model = joblib.load('./models/medication_classification_model/knn_model.pkl')
|
20 |
+
label_encoders = joblib.load('./models/medication_classification_model/label_encoders.pkl')
|
21 |
+
age_scaler = joblib.load('./models/medication_classification_model/age_scaler.pkl')
|
22 |
+
medication_encoder = joblib.load('./models/medication_classification_model/medication_encoder.pkl')
|
23 |
+
|
24 |
+
|
25 |
+
# Existing model loading code...
|
26 |
+
|
27 |
+
@app.route('/')
|
28 |
+
def index():
|
29 |
+
session.clear() # Clear session when accessing the homepage
|
30 |
+
return render_template('index.html')
|
31 |
+
|
32 |
+
@app.route('/reset_chat', methods=['POST'])
|
33 |
+
def reset_chat():
|
34 |
+
session.clear()
|
35 |
+
return jsonify({'status': 'Chat reset'})
|
36 |
+
|
37 |
+
def generate_response(model, tokenizer, input_text, session_key):
|
38 |
+
# Prepare input for model
|
39 |
+
encoding = tokenizer(input_text, max_length=500, padding='max_length', truncation=True, return_tensors='tf')
|
40 |
+
input_ids = encoding['input_ids']
|
41 |
+
attention_mask = encoding['attention_mask']
|
42 |
+
# Generate response
|
43 |
+
outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, max_length=512, num_beams=5, early_stopping=True)
|
44 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
45 |
+
# Store in session
|
46 |
+
if session_key not in session:
|
47 |
+
session[session_key] = []
|
48 |
+
session[session_key].append({'user': input_text, 'bot': response})
|
49 |
+
return response
|
50 |
+
|
51 |
+
@app.route('/counseling_greeting', methods=['POST'])
|
52 |
+
def counseling_greeting():
|
53 |
+
data = request.get_json()
|
54 |
+
prompt = data['prompt']
|
55 |
+
response = generate_response(counseling_greeting_model, counseling_greeting_tokenizer, f"question: {prompt}", 'counseling_greeting')
|
56 |
+
return jsonify({'response': response, 'conversation': session['counseling_greeting']})
|
57 |
+
|
58 |
+
@app.route('/medication_info', methods=['POST'])
|
59 |
+
def medication_info():
|
60 |
+
data = request.get_json()
|
61 |
+
question = data['question']
|
62 |
+
response = generate_response(med_info_model, med_info_tokenizer, f"question: {question}", 'medication_info')
|
63 |
+
return jsonify({'response': response, 'conversation': session['medication_info']})
|
64 |
+
|
65 |
+
@app.route('/classify_medication', methods=['POST'])
|
66 |
+
def classify_medication():
|
67 |
+
data = pd.DataFrame([request.get_json()])
|
68 |
+
for column in ['Gender', 'Blood Type', 'Medical Condition', 'Test Results']:
|
69 |
+
data[column] = label_encoders[column].transform(data[column])
|
70 |
+
data['Age'] = age_scaler.transform(data[['Age']])
|
71 |
+
predictions = knn_model.predict(data)
|
72 |
+
predicted_medications = medication_encoder.inverse_transform(predictions)
|
73 |
+
if 'classify_medication' not in session:
|
74 |
+
session['classify_medication'] = []
|
75 |
+
session['classify_medication'].append({'user': data.to_dict(), 'bot': predicted_medications[0]})
|
76 |
+
return jsonify({'medication': predicted_medications[0], 'conversation': session['classify_medication']})
|
77 |
+
|
78 |
+
|
79 |
+
if __name__ == '__main__':
|
80 |
+
app.run(debug=True)
|
counseling_model.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
2 |
+
import tensorflow as tf
|
3 |
+
|
4 |
+
# Load the model and tokenizer
|
5 |
+
model = TFT5ForConditionalGeneration.from_pretrained('models\\counsel_model\\saved_model')
|
6 |
+
tokenizer = T5Tokenizer.from_pretrained('models\\counsel_model\\tokenizer')
|
7 |
+
|
8 |
+
def generate_answer(question):
|
9 |
+
input_text = f"question: {question}"
|
10 |
+
encoding = tokenizer(
|
11 |
+
input_text,
|
12 |
+
max_length=500,
|
13 |
+
padding='max_length',
|
14 |
+
truncation=True,
|
15 |
+
return_tensors='tf'
|
16 |
+
)
|
17 |
+
|
18 |
+
input_ids = encoding['input_ids']
|
19 |
+
attention_mask = encoding['attention_mask']
|
20 |
+
|
21 |
+
generated_text = ""
|
22 |
+
max_length = 512
|
23 |
+
current_input_ids = input_ids
|
24 |
+
|
25 |
+
while True:
|
26 |
+
outputs = model.generate(
|
27 |
+
input_ids=current_input_ids,
|
28 |
+
attention_mask=attention_mask,
|
29 |
+
max_length=max_length,
|
30 |
+
num_beams=5,
|
31 |
+
early_stopping=True,
|
32 |
+
no_repeat_ngram_size=2,
|
33 |
+
return_dict_in_generate=True,
|
34 |
+
output_scores=True
|
35 |
+
)
|
36 |
+
|
37 |
+
text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
38 |
+
generated_text += text
|
39 |
+
|
40 |
+
if len(text.split()) < max_length:
|
41 |
+
break
|
42 |
+
|
43 |
+
current_input_ids = tokenizer.encode(text, return_tensors='tf')
|
44 |
+
attention_mask = tf.ones_like(current_input_ids)
|
45 |
+
|
46 |
+
return generated_text
|
greet_counsel.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import T5Tokenizer, TFT5ForConditionalGeneration, AutoTokenizer, TFAutoModelForSeq2SeqLM
|
2 |
+
import tensorflow as tf
|
3 |
+
|
4 |
+
# Load both models and tokenizers
|
5 |
+
model1 = TFT5ForConditionalGeneration.from_pretrained('models\\counsel_model\\saved_model')
|
6 |
+
tokenizer1 = T5Tokenizer.from_pretrained('models\\counsel_model\\tokenizer')
|
7 |
+
|
8 |
+
model2 = TFAutoModelForSeq2SeqLM.from_pretrained('models\\greeting_model\\saved_model')
|
9 |
+
tokenizer2 = AutoTokenizer.from_pretrained('models\\greeting_model\\saved_model')
|
10 |
+
|
11 |
+
def ensemble_generate(question):
|
12 |
+
# Prepare the input for Model 1
|
13 |
+
input_text1 = f"question: {question}"
|
14 |
+
encoding1 = tokenizer1(
|
15 |
+
input_text1,
|
16 |
+
max_length=500,
|
17 |
+
padding='max_length',
|
18 |
+
truncation=True,
|
19 |
+
return_tensors='tf'
|
20 |
+
)
|
21 |
+
|
22 |
+
input_ids1 = encoding1['input_ids']
|
23 |
+
attention_mask1 = encoding1['attention_mask']
|
24 |
+
|
25 |
+
# Generate output from Model 1
|
26 |
+
outputs1 = model1.generate(
|
27 |
+
input_ids=input_ids1,
|
28 |
+
attention_mask=attention_mask1,
|
29 |
+
max_length=512,
|
30 |
+
num_beams=5,
|
31 |
+
early_stopping=True,
|
32 |
+
no_repeat_ngram_size=2,
|
33 |
+
return_dict_in_generate=True,
|
34 |
+
output_scores=True
|
35 |
+
)
|
36 |
+
|
37 |
+
generated_text1 = tokenizer1.decode(outputs1.sequences[0], skip_special_tokens=True)
|
38 |
+
|
39 |
+
# Prepare the input for Model 2
|
40 |
+
input_ids2 = tokenizer2.encode(question, return_tensors='tf')
|
41 |
+
|
42 |
+
# Generate output from Model 2
|
43 |
+
outputs2 = model2.generate(input_ids2, max_length=500, num_beams=4, early_stopping=True)
|
44 |
+
generated_text2 = tokenizer2.decode(outputs2[0], skip_special_tokens=True)
|
45 |
+
|
46 |
+
# Ensemble strategy: Simple concatenation of both responses
|
47 |
+
final_response = f"Model 1 Response: {generated_text1}\nModel 2 Response: {generated_text2}"
|
48 |
+
|
49 |
+
return final_response
|
50 |
+
|
51 |
+
# Test the ensemble with some questions
|
52 |
+
test_questions = [
|
53 |
+
'What does it mean to have a mental illness?',
|
54 |
+
'What are some of the warning signs of mental illness?',
|
55 |
+
'What is the Information Technology syllabus?',
|
56 |
+
'How are you? How is your day?',
|
57 |
+
'Is anyone there?',
|
58 |
+
]
|
59 |
+
|
60 |
+
for question in test_questions:
|
61 |
+
print(f"Question: {question}")
|
62 |
+
print(f"Ensembled Answer: {ensemble_generate(question)}\n")
|
greeting_model.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM
|
3 |
+
|
4 |
+
# Load the fine-tuned model and tokenizer
|
5 |
+
model = TFAutoModelForSeq2SeqLM.from_pretrained('models\\greeting_model\\saved_model')
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained('models\\greeting_model\\saved_model')
|
7 |
+
|
8 |
+
def generate_response(input_text, max_length=500):
|
9 |
+
# Tokenize the input text
|
10 |
+
input_ids = tokenizer.encode(input_text, return_tensors='tf')
|
11 |
+
|
12 |
+
# Generate the response from the model
|
13 |
+
outputs = model.generate(input_ids, max_length=max_length, num_beams=4, early_stopping=True)
|
14 |
+
|
15 |
+
# Decode the generated tokens back to text
|
16 |
+
decoded_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
17 |
+
|
18 |
+
return decoded_response
|
medication_classification_model.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def predict_medication(new_data):
|
5 |
+
# Load the model, encoders, and scaler
|
6 |
+
knn = joblib.load('models\\medication_classification_model\\knn_model.pkl')
|
7 |
+
label_encoders = joblib.load('models\\medication_classification_model\\label_encoders.pkl')
|
8 |
+
age_scaler = joblib.load('models\\medication_classification_model\\age_scaler.pkl')
|
9 |
+
medication_encoder = joblib.load('models\\medication_classification_model\\medication_encoder.pkl')
|
10 |
+
|
11 |
+
# Encode the new data using the saved label encoders
|
12 |
+
for column in ['Gender', 'Blood Type', 'Medical Condition', 'Test Results']:
|
13 |
+
new_data[column] = label_encoders[column].transform(new_data[column])
|
14 |
+
|
15 |
+
# Normalize the 'Age' column in the new data
|
16 |
+
new_data['Age'] = age_scaler.transform(new_data[['Age']])
|
17 |
+
|
18 |
+
# Make predictions
|
19 |
+
predictions = knn.predict(new_data)
|
20 |
+
|
21 |
+
# Decode the predictions back to the original medication names
|
22 |
+
predicted_medications = medication_encoder.inverse_transform(predictions)
|
23 |
+
|
24 |
+
return predicted_medications
|
medication_info_model.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
2 |
+
import tensorflow as tf
|
3 |
+
|
4 |
+
# Load the model and tokenizer
|
5 |
+
model = TFT5ForConditionalGeneration.from_pretrained('models\\medication_info_model\\saved_model')
|
6 |
+
tokenizer = T5Tokenizer.from_pretrained('models\\medication_info_model\\tokenizer')
|
7 |
+
|
8 |
+
def generate_answer(question):
|
9 |
+
input_text = f"question: {question}"
|
10 |
+
encoding = tokenizer(
|
11 |
+
input_text,
|
12 |
+
max_length=1024,
|
13 |
+
padding='max_length',
|
14 |
+
truncation=True,
|
15 |
+
return_tensors='tf'
|
16 |
+
)
|
17 |
+
|
18 |
+
input_ids = encoding['input_ids']
|
19 |
+
attention_mask = encoding['attention_mask']
|
20 |
+
|
21 |
+
generated_text = ""
|
22 |
+
max_length = 1024
|
23 |
+
current_input_ids = input_ids
|
24 |
+
|
25 |
+
while True:
|
26 |
+
outputs = model.generate(
|
27 |
+
input_ids=current_input_ids,
|
28 |
+
attention_mask=attention_mask,
|
29 |
+
max_length=max_length,
|
30 |
+
num_beams=5,
|
31 |
+
early_stopping=True,
|
32 |
+
no_repeat_ngram_size=2,
|
33 |
+
return_dict_in_generate=True,
|
34 |
+
output_scores=True
|
35 |
+
)
|
36 |
+
|
37 |
+
text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
|
38 |
+
generated_text += text
|
39 |
+
|
40 |
+
if len(text.split()) < max_length:
|
41 |
+
break
|
42 |
+
|
43 |
+
current_input_ids = tokenizer.encode(text, return_tensors='tf')
|
44 |
+
attention_mask = tf.ones_like(current_input_ids)
|
45 |
+
|
46 |
+
return generated_text
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask
|
2 |
+
transformers
|
3 |
+
tensorflow
|
4 |
+
numpy
|
5 |
+
sqlite3
|