|
|
|
import pandas as pd |
|
import json |
|
import gradio as gr |
|
from pathlib import Path |
|
from ragatouille import RAGPretrainedModel |
|
from gradio_client import Client |
|
from tempfile import NamedTemporaryFile |
|
from sentence_transformers import CrossEncoder |
|
import numpy as np |
|
from time import perf_counter |
|
from sentence_transformers import CrossEncoder |
|
|
|
|
|
from backend.semantic_search import table, retriever |
|
|
|
VECTOR_COLUMN_NAME = "vector" |
|
TEXT_COLUMN_NAME = "text" |
|
proj_dir = Path.cwd() |
|
|
|
|
|
import logging |
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
client = Client("Qwen/Qwen1.5-110B-Chat-demo") |
|
|
|
def system_instructions(question_difficulty, topic, documents_str): |
|
return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]""" |
|
|
|
|
|
RAG_db = gr.State() |
|
quiz_data = None |
|
|
|
|
|
|
|
def json_to_excel(output_json): |
|
|
|
data = [] |
|
gr.Warning('Generating Shareable file link..', duration=30) |
|
for i in range(1, 11): |
|
question_key = f"Q{i}" |
|
answer_key = f"A{i}" |
|
|
|
question = output_json.get(question_key, '') |
|
correct_answer_key = output_json.get(answer_key, '') |
|
|
|
correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else '' |
|
|
|
|
|
option_keys = [f"{question_key}:C{i}" for i in range(1, 6)] |
|
options = [output_json.get(key, '') for key in option_keys] |
|
|
|
|
|
data.append([ |
|
question, |
|
"Multiple Choice", |
|
options[0], |
|
options[1], |
|
options[2] if len(options) > 2 else '', |
|
options[3] if len(options) > 3 else '', |
|
options[4] if len(options) > 4 else '', |
|
correct_answer, |
|
30, |
|
'' |
|
]) |
|
|
|
|
|
df = pd.DataFrame(data, columns=[ |
|
"Question Text", |
|
"Question Type", |
|
"Option 1", |
|
"Option 2", |
|
"Option 3", |
|
"Option 4", |
|
"Option 5", |
|
"Correct Answer", |
|
"Time in seconds", |
|
"Image Link" |
|
]) |
|
|
|
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx") |
|
df.to_excel(temp_file.name, index=False) |
|
return temp_file.name |
|
|
|
colorful_theme = gr.themes.Default( |
|
primary_hue="cyan", |
|
secondary_hue="yellow", |
|
neutral_hue="purple" |
|
|
|
) |
|
|
|
|
|
with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT: |
|
|
|
|
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
gr.Image(value='logo.png', height=200, width=200) |
|
with gr.Column(scale=6): |
|
gr.HTML(""" |
|
<center> |
|
<h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span> STUDENTS QUIZBOT </h1> |
|
<h2>Generative AI-powered Capacity building for STUDENTS</h2> |
|
<i>⚠️ Students can create quiz from any topic from 10 science and evaluate themselves! ⚠️</i> |
|
</center> |
|
""") |
|
|
|
|
|
|
|
|
|
topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any CHAPTER NAME") |
|
|
|
with gr.Row(): |
|
difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?") |
|
model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'], |
|
value='(ACCURATE) BGE reranker', label="Embeddings", |
|
info="First query to ColBERT may take a little time") |
|
|
|
generate_quiz_btn = gr.Button("Generate Quiz!🚀") |
|
quiz_msg = gr.Textbox() |
|
|
|
question_radios = [gr.Radio(visible=False) for _ in range(10)] |
|
|
|
@generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")]) |
|
def generate_quiz(question_difficulty, topic, cross_encoder): |
|
top_k_rank = 10 |
|
documents = [] |
|
gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60) |
|
|
|
if cross_encoder == '(HIGH ACCURATE) ColBERT': |
|
gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100) |
|
RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0") |
|
RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index') |
|
documents_full = RAG_db.value.search(topic, k=top_k_rank) |
|
documents = [item['content'] for item in documents_full] |
|
|
|
else: |
|
document_start = perf_counter() |
|
query_vec = retriever.encode(topic) |
|
doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank) |
|
|
|
documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list() |
|
documents = [doc[TEXT_COLUMN_NAME] for doc in documents] |
|
|
|
query_doc_pair = [[topic, doc] for doc in documents] |
|
|
|
|
|
|
|
if cross_encoder == '(ACCURATE) BGE reranker': |
|
cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base') |
|
|
|
cross_scores = cross_encoder1.predict(query_doc_pair) |
|
sim_scores_argsort = list(reversed(np.argsort(cross_scores))) |
|
documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]] |
|
|
|
|
|
formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents)) |
|
print(' Formatted Prompt : ' ,formatted_prompt) |
|
try: |
|
response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat") |
|
response1 = response[1][0][1] |
|
|
|
|
|
start_index = response1.find('{') |
|
end_index = response1.rfind('}') |
|
cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else '' |
|
print('Cleaned Response :',cleaned_response) |
|
output_json = json.loads(cleaned_response) |
|
|
|
global quiz_data |
|
quiz_data = output_json |
|
|
|
excel_file = json_to_excel(output_json) |
|
|
|
|
|
|
|
question_radio_list = [] |
|
for question_num in range(1, 11): |
|
question_key = f"Q{question_num}" |
|
answer_key = f"A{question_num}" |
|
|
|
question = output_json.get(question_key) |
|
answer = output_json.get(output_json.get(answer_key)) |
|
|
|
if not question or not answer: |
|
continue |
|
|
|
choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)] |
|
choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys] |
|
|
|
radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True) |
|
question_radio_list.append(radio) |
|
|
|
return ['Quiz Generated!'] + question_radio_list + [excel_file] |
|
|
|
except json.JSONDecodeError as e: |
|
print(f"Failed to decode JSON: {e}") |
|
|
|
check_button = gr.Button("Check Score") |
|
score_textbox = gr.Markdown() |
|
|
|
@check_button.click(inputs=question_radios, outputs=score_textbox) |
|
def compare_answers(*user_answers): |
|
user_answer_list = list(user_answers) |
|
answers_list = [] |
|
|
|
for question_num in range(1, 20): |
|
answer_key = f"A{question_num}" |
|
answer = quiz_data.get(quiz_data.get(answer_key)) |
|
if not answer: |
|
break |
|
answers_list.append(answer) |
|
|
|
score = sum(1 for item in user_answer_list if item in answers_list) |
|
|
|
if score > 7: |
|
message = f"### Excellent! You got {score} out of 10!" |
|
elif score > 5: |
|
message = f"### Good! You got {score} out of 10!" |
|
else: |
|
message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!" |
|
|
|
return message |
|
|
|
QUIZBOT.queue() |
|
QUIZBOT.launch(debug=True) |
|
|
|
|