import gradio as gr import random import os import json from collections import defaultdict from pathlib import Path import pandas as pd import zipfile # Constants BASE_PATH = "./data" STUDENT_DATA_PATH = f"/data/student_data_tmp" RESPONSE_FILE = f"/data/responses_tmp" IMG_PATH = "images" DATA_MAP = "./data.csv" TOTAL_ITEMS = 20 # STUDENT_DATA_LOCKFILE = "./data/sd" # RESPONSE_FILE_LOCKFILE = "./data/rf" student_ = {} # Lock Handling def acquire_lock(lockfile): while os.path.exists(lockfile): pass # Busy-wait for the lock file to be released open(lockfile, "w").close() # Create an empty lock file def release_lock(lockfile): if os.path.exists(lockfile): os.remove(lockfile) # Delete the lock file def return_dict(): return {} # Class Definition class Student: def __init__(self, student_id=None): self.id = student_id self.count = 0 self.reponses = defaultdict(return_dict) self.finished_img = set() self.data = {} # Read the CSV file into a DataFrame self.df = pd.read_csv(DATA_MAP, header=None, names=["ImageFile", "Caption"]) self.indices = [i for i in range(len(self.df))] # acquire_lock(f"{STUDENT_DATA_LOCKFILE}_{self.id}.lock") try: if not os.path.exists(f"{STUDENT_DATA_PATH}_{self.id}.json"): open(f"{STUDENT_DATA_PATH}_{self.id}.json", "w").close() with open(f"{STUDENT_DATA_PATH}_{self.id}.json", "r") as f: try: self.data = json.load(f) self.data_ = self.data.get(student_id, {}) self.count = self.data_.get("count", 0) self.finished_img = set(self.data_.get("finished_imgs", [])) except Exception: pass finally: # release_lock(f"{STUDENT_DATA_LOCKFILE}_{self.id}.lock") pass # acquire_lock(f"{RESPONSE_FILE_LOCKFILE}_{self.id}.lock") try: if not os.path.exists(f"{RESPONSE_FILE}_{self.id}.json"): open(f"{RESPONSE_FILE}_{self.id}.json", "w").close() finally: # release_lock(f"{RESPONSE_FILE_LOCKFILE}_{self.id}.lock") pass def save(self): # acquire_lock(f"{STUDENT_DATA_LOCKFILE}_{self.id}.lock") try: self.data[self.id] = { "count": self.count, "finished_img": list(self.finished_img), } with open(f"{STUDENT_DATA_PATH}_{self.id}.json", "w") as f: json.dump(self.data, f) finally: # release_lock(f"{STUDENT_DATA_LOCKFILE}_{self.id}.lock") pass # acquire_lock(f"{RESPONSE_FILE_LOCKFILE}_{self.id}.lock") try: d = {} with open(f"{RESPONSE_FILE}_{self.id}.json", "r") as f: try: d = json.load(f) except Exception: pass for k in self.reponses: v = self.reponses[k] for k2 in v: if d.get(k, -1) != -1: d[k][k2] += 1 else: d[k] = { "Yes": 0, "Yes, but hallucinated": 0, "No": 0, "Ambiguous": 0, } d[k][k2] += 1 with open(f"{RESPONSE_FILE}_{self.id}.json", "w") as f: json.dump(d, f) finally: # release_lock(f"{RESPONSE_FILE_LOCKFILE}_{self.id}.lock") pass def rand_img_cap(self): id = None while id == None or ((current_img_id, caption) in self.finished_img): id = random.choice(self.indices) if self.indices else None current_img_id = self.df.iloc[id]["ImageFile"] image_id = f"{BASE_PATH}/{self.df.iloc[id]['ImageFile']}" caption = self.df.iloc[id]["Caption"] self.finished_img.add((current_img_id, caption)) self.did = f"{current_img_id} : {caption}" return image_id, caption def download_file1(): # Path to the file you want to be downloaded path = Path("/data/") files = [*path.glob('*.json')] files = [str(f) for f in files] zip_path = '/data/download_files.zip' # Create a ZIP file with zipfile.ZipFile(zip_path, 'w') as zipf: for file in files: # Add file to ZIP zipf.write(file, file) # Return the ZIP file path for Gradio to handle the download return gr.update(value=zip_path, visible=True) def download_file2(): # Path to the file you want to be downloaded file_path = "/data/student_data_new.json" return gr.update(visible=True, value=file_path) # Gradio Functions def start(student_id): global student_ student = Student(student_id) student_[str(student_id)] = student return ( gr.update(value=student_id, visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), ) def show(id): global student_ student = student_[id] if id == "ADMIN": tmp = True else: tmp = False if student.count == TOTAL_ITEMS: return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=tmp), gr.update(visible=tmp), ) selected_image, prompt = student.rand_img_cap() return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(value=f"{student.count}/{TOTAL_ITEMS}", visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(value=selected_image, visible=True), gr.update(visible=True, value=f"{prompt}"), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=tmp), gr.update(visible=tmp), ) def new(student): student.count += 1 new_img, prompt = student.rand_img_cap() if student.count == TOTAL_ITEMS: student.save() return ( gr.update(value=f"{student.count}/{TOTAL_ITEMS}", visible=True), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), ) return ( gr.update(value=f"{student.count}/{TOTAL_ITEMS}", visible=True), gr.update(visible=False), gr.update(value=new_img, visible=True), gr.update(visible=True, value=f"{prompt}"), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), ) def correct(id): global student_ student = student_[id] student.reponses[student.did]["Yes"] = 1 return new(student=student) def challucinate(id): global student_ student = student_[id] student.reponses[student.did]["Yes, but hallucinated"] = 1 return new(student=student) def wrong(id): global student_ student = student_[id] student.reponses[student.did]["No"] = 1 return new(student=student) def neutral(id): global student_ student = student_[id] student.reponses[student.did]["Ambiguous"] = 1 return new(student=student) def save(id): global student_ student = student_[id] student.save() # Gradio Interface with gr.Blocks() as iface: student_id_input = gr.Textbox(label="ASU ID") with gr.Row(): gid = gr.Textbox(label="", visible=False) counter = gr.Textbox(value=f"0/{TOTAL_ITEMS}", show_label=False, visible=False) save_btn = gr.Button("Save", size="sm") # Example page with gr.Column(): instructions = gr.TextArea( label="Overview and Instructions", value="""While Text-to-Image (T2I) Generative Models now excel at creating photo-realistic images, they still struggle to maintain spatial consistency. That is, they are not able to comprehend the difference between left/right or above/below or front/behind. One of the fundamental reasons for this is that the text data that they are trained on do not contain "enough" of these spatial phrases. Specifically, datasets like CoCo and LAION have only about ~5% and ~3% of these phrases. This number is relatively low compared to how often us humans use these phrases. Thus, as a first step to improve this shortcoming of the T2I models, we are creating "better" spatial data. And, how are we doing that? We are using another model to re-caption images with a "spatial" focus. And, we need your help in validating these new, model-generated captions. In this task, you will be a given an image and a caption corresponding to it. You will have to mark it correct/incorrect/unsure based on your judgement. Some instructions for you: 1. Most of the captions will contain spatial phrases. So, please pay special attention to their correctness. 2. The captions will not fully describe the image; but that's okay. We want to make sure that whichever aspect the captions describe is fully correct. 3. If there are multiple relationships mentioned and any 1 of them is incorrect, please choose 'incorrect' 3. In some cases, the captions might contain objects that you will not see in the image. This is known as the phenomenon of "hallucination". In this case, we'd like you to mark the captions as correct but hallucination (if everything else is correct). 4. Please use 'Ambiguous' sparingly ; only in cases when you are not able to clearly distinguish between spatial relationships.""", visible=False, ) with gr.Row(): with gr.Column(): cexam = gr.Textbox( show_label=False, value="Correct examples", visible=False ) with gr.Column(): ei1 = gr.Image( value="./assets/c1.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et1 = gr.Textbox( show_label=False, value="The horse is on the right of the stop-sign", visible=False, ) with gr.Column(): ei2 = gr.Image( value="./assets/c2.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et2 = gr.Textbox( show_label=False, value="The clock is behind the girl", visible=False, ) with gr.Column(): ei3 = gr.Image( value="./assets/c3.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et3 = gr.Textbox( show_label=False, value="The microwave is above the oven", visible=False, ) with gr.Column(): wexam = gr.Textbox( show_label=False, value="Wrong examples", visible=False ) with gr.Column(): ei4 = gr.Image( value="./assets/w1.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et4 = gr.Textbox( show_label=False, value="The aeroplane is below the person", visible=False, ) with gr.Column(): ei5 = gr.Image( value="./assets/w2.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et5 = gr.Textbox( show_label=False, value="The pizza is to the right of the strawberries", visible=False, ) with gr.Column(): ei6 = gr.Image( value="./assets/w3.jpeg", visible=False, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) et6 = gr.Textbox( show_label=False, value="The cat is behind the bowl of fruits", visible=False, ) show_btn = gr.Button("Continue", visible=False) img = gr.Image( value=None, interactive=False, show_download_button=False, show_share_button=False, height=512, width=512, ) error = gr.Textbox( value="Attempt all questions", interactive=False, visible=False, label="" ) q = gr.Textbox(label="Choose the correct option for the caption") finish = gr.Textbox( value="Task finished", interactive=False, visible=False, label="" ) start_btn = gr.Button("Start") with gr.Row(): correct_btn = gr.Button("Yes") challucinate_btn = gr.Button("Yes, but hallucinated") wrong_btn = gr.Button("No") neutral_btn = gr.Button("Ambiguous") download_btn1 = gr.Button("Download response data") f1 = gr.File(label="Response file", visible=False, file_count="single") download_btn2 = gr.Button("Download student data") f2 = gr.File(label="Student data file", visible=False) # Set initial visibility student_id_input.visible = True img.visible = False q.visible = False save_btn.visible = False correct_btn.visible = False challucinate_btn.visible = False wrong_btn.visible = False neutral_btn.visible = False download_btn1.visible = False download_btn2.visible = False start_btn.click( start, inputs=student_id_input, outputs=[ gid, instructions, cexam, wexam, ei1, et1, ei2, et2, ei3, et3, ei4, et4, ei5, et5, ei6, et6, show_btn, start_btn, ], ) show_btn.click( show, inputs=gid, outputs=[ instructions, cexam, wexam, ei1, et1, ei2, et2, ei3, et3, ei4, et4, ei5, et5, ei6, et6, show_btn, counter, save_btn, finish, img, q, start_btn, correct_btn, challucinate_btn, wrong_btn, neutral_btn, download_btn1, download_btn2, ], ) correct_btn.click( correct, inputs=gid, outputs=[ counter, finish, img, q, correct_btn, challucinate_btn, wrong_btn, neutral_btn, ], ) challucinate_btn.click( challucinate, inputs=gid, outputs=[ counter, finish, img, q, correct_btn, challucinate_btn, wrong_btn, neutral_btn, ], ) wrong_btn.click( wrong, inputs=gid, outputs=[ counter, finish, img, q, correct_btn, challucinate_btn, wrong_btn, neutral_btn, ], ) neutral_btn.click( neutral, inputs=gid, outputs=[ counter, finish, img, q, correct_btn, challucinate_btn, wrong_btn, neutral_btn, ], ) save_btn.click(save, inputs=[gid], outputs=None) download_btn1.click(download_file1, inputs=None, outputs=f1) download_btn2.click(download_file2, inputs=None, outputs=f2) # Launch Interface iface.launch()