Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Dec 25 18:18:27 2023 | |
@author: alish | |
""" | |
import gradio as gr | |
import fitz # PyMuPDF | |
import questiongenerator as qs | |
import random | |
from sentence_transformers import SentenceTransformer, util | |
from questiongenerator import QuestionGenerator | |
qg = QuestionGenerator() | |
def highlight_similar_sentence(text1, text2, color='yellow'): | |
# Load the pre-trained sentence-transformers model | |
model = SentenceTransformer("paraphrase-MiniLM-L6-v2") | |
# Split text into sentences | |
sentences_text1 = [sentence.strip() for sentence in text1.split('.') if sentence.strip()] | |
sentences_text2 = [sentence.strip() for sentence in text2.split('.') if sentence.strip()] | |
# Compute embeddings for text1 | |
#embeddings_text1 = model.encode(sentences_text1, convert_to_tensor=True) | |
highlighted_text2 = text2 | |
max_similarity = 0.0 | |
# Find the most similar sentence in text2 for each sentence in text1 | |
for sentence_text1 in sentences_text1: | |
# Compute embeddings for the current sentence in text1 | |
embedding_text1 = model.encode(sentence_text1, convert_to_tensor=True) | |
for sentence_text2 in sentences_text2: | |
# Compute cosine similarity between sentence in text1 and text2 | |
embedding_text2 = model.encode(sentence_text2, convert_to_tensor=True) | |
similarity = util.pytorch_cos_sim(embedding_text1, embedding_text2).item() | |
# Highlight the most similar sentence in text2 | |
if similarity > max_similarity: | |
max_similarity = similarity | |
highlighted_text2= highlight_text(text2, sentence_text2, color=color) | |
#highlighted_text2 = text2.replace(sentence_text2, f"<span style='background-color: {color};'>{sentence_text2}</span>") | |
return highlighted_text2 | |
def Extract_QA(qlist,selected_extracted_text): | |
Q_All='' | |
A_All='' | |
xs=['A','B','C','D'] | |
h_colors=['yellow', 'red', 'DodgerBlue', 'Orange', 'Violet'] | |
for i in range(len(qlist)): | |
question_i= qlist[i]['question'] | |
Choices_ans= [] | |
Choice_is_correct=[] | |
for j in range(4): | |
Choices_ans= Choices_ans+ [qlist[i]['answer'][j]['answer']] | |
Choice_is_correct= Choice_is_correct+ [qlist[i]['answer'][j]['correct']] | |
Q=f""" | |
Q_{i+1}: {question_i} | |
A. {Choices_ans[0]} | |
B. {Choices_ans[1]} | |
C. {Choices_ans[2]} | |
D. {Choices_ans[3]} | |
""" | |
result = [x for x, y in zip(xs, Choice_is_correct) if y ] | |
correct_answer= [x for x, y in zip(Choices_ans, Choice_is_correct) if y ] | |
A= f""" | |
<p>Answer_{i+1}: {result[0]} - {correct_answer[0]}<p> | |
""" | |
color= h_colors[i] | |
A_sen= f""" The correct answer is {correct_answer[0]}.""" | |
A= highlight_text(input_text=A, selcted_text=correct_answer[0], color=color) | |
selected_extracted_text= highlight_similar_sentence(A_sen, selected_extracted_text, color=color) | |
Q_All= Q_All+Q | |
A_All=A_All+ A | |
return (Q_All,A_All,selected_extracted_text) | |
def extract_text_from_pdf(pdf_file_path): | |
# Read the PDF file | |
global extracted_text | |
text = [] | |
with fitz.open(pdf_file_path) as doc: | |
for page in doc: | |
text.append(page.get_text()) | |
extracted_text= '\n'.join(text) | |
extracted_text= get_sub_text(extracted_text) | |
#return ("The pdf is uploaded Successfully from:"+ str(pdf_file_path)) | |
return ("File is uploaded Successfuly!") | |
qg = qs.QuestionGenerator() | |
def get_sub_text(TXT): | |
sub_texts= qg._split_into_segments(TXT) | |
if isinstance(sub_texts, list): | |
return sub_texts | |
else: | |
return [sub_texts] | |
def highlight_text(input_text, selcted_text, color='yellow'): | |
# Replace 'highlight' with <span> tags for highlighting | |
highlighted_text = input_text.replace(selcted_text, f'<span style="background-color: {color}">{selcted_text}</span>') | |
return highlighted_text | |
def pick_One_txt(sub_texts): | |
global selected_extracted_text | |
N= len(sub_texts) | |
if N==1: | |
selected_extracted_text= sub_texts[0] | |
return(selected_extracted_text) | |
# Generate a random number between low and high | |
random_number = random.uniform(0, N) | |
# Pick the integer part of the random number | |
random_number = int(random_number) | |
selected_extracted_text= sub_texts[random_number] | |
return(selected_extracted_text) | |
def pipeline(NoQs): | |
global Q,A | |
text= selected_extracted_text | |
qlist= qg.generate(text, num_questions=NoQs, answer_style="multiple_choice") | |
Q,A,highligthed_text= Extract_QA(qlist,text) | |
A= A + '\n'+highligthed_text | |
return (Q,A) | |
def ReurnAnswer(): | |
return A | |
def GetQuestion(NoQs): | |
NoQs=int(NoQs) | |
pick_One_txt(extracted_text) | |
Q,A=pipeline(NoQs) | |
return Q | |
with gr.Blocks() as demo: | |
global input_file | |
with gr.Row(): | |
with gr.Column(scale=1): | |
with gr.Row(): | |
gr.Image("PupQuizAI.png") | |
gr.Markdown(""" 🐶 **PupQuizAI** is an Artificial-Intelligence tool that streamlines the studying process. Simply input a text pdf that you need to study from. Then, PupQuiz will create 1-5 custom questions for you to study from each time you push 'Show Questions'. | |
""" ) | |
with gr.Row(): | |
input_file=gr.UploadButton(label='Select a file!', file_types=[".pdf"]) | |
#status = gr.Textbox(label="Status") | |
status = gr.HTML( ) | |
input_file.upload(fn=extract_text_from_pdf, inputs=input_file,outputs=status) | |
#upload_btn = gr.Button(value="Upload the pdf File.") | |
Gen_Question = gr.Button(value="Show Questions") | |
Gen_Answer = gr.Button(value="Show Answers") | |
No_Qs= gr.Slider(minimum=1, maximum=5,value=3, step=1, label='Max # of Questions') | |
gr.Markdown(""" 🐶 | |
**Instructions:** | |
* Start by selecting a 'pdf' text file you want to upload by clicking the "Select file" button. (PupQuiz currently only supports files that can have highlightable text) | |
* Select the number of questions you want generated from the "# of Questions" selector. | |
* Click "Show Questions" | |
* Then, if you want answers to the questions, select "Show Answers" """ ) | |
#gr.Image("PupQuizAI.png") | |
with gr.Column(scale=2.0): | |
#file_stat= gr.Textbox(label="File Status") | |
question = gr.Textbox(label="Question(s)") | |
#Answer = gr.Textbox(label="Answer(s)") | |
Answer = gr.HTML(label="Answer(s)") | |
Gen_Question.click(GetQuestion, inputs=No_Qs, outputs=question, api_name="QuestioGenerator") | |
Gen_Answer.click(ReurnAnswer, inputs=None, outputs=Answer, api_name="QuestioGenerator") | |
demo.launch() |