Spaces:

fffiloni
/

Gaze-LLE

Running on Zero

App Files Files Community

fffiloni commited on 17 days ago

Commit

33c5278

verified ·

1 Parent(s): 50964d7

add credentials + heatmaps outputs

Browse files

Files changed (1) hide show

app.py +36 -7

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ model, transform = torch.hub.load("fkryan/gazelle", "gazelle_dinov2_vitl14_inout
 model.eval()
 model.to(device)
-def main(image_input):
     # load image
     image = Image.open(image_input)
     width, height = image.size
@@ -73,6 +73,10 @@ def main(image_input):
                 draw.text((text_x, text_y), text, fill="lime", font=ImageFont.load_default(size=int(min(width, height) * 0.05)))
         return overlay_image
     # combined visualization with maximal gaze points for each person
@@ -113,21 +117,46 @@ def main(image_input):
     result_gazed = visualize_all(image, output['heatmap'][0], norm_bboxes[0], output['inout'][0] if output['inout'] is not None else None, inout_thresh=0.5)
-    return result_gazed
-with gr.Blocks() as demo:
-    with gr.Column():
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(label="Image Input", type="filepath")
                 submit_button = gr.Button("Submit")
             with gr.Column():
                 result = gr.Image(label="Result")
     submit_button.click(
         fn = main,
         inputs = [input_image],
-        outputs = [result]
     )
 demo.queue().launch(show_api=False, show_error=True)

 model.eval()
 model.to(device)
+def main(image_input, progress=gr.Progress(track_tqdm=True)):
     # load image
     image = Image.open(image_input)
     width, height = image.size
                 draw.text((text_x, text_y), text, fill="lime", font=ImageFont.load_default(size=int(min(width, height) * 0.05)))
         return overlay_image
+    heatmap_results = []
+    for i in range(len(bboxes)):
+        overlay_img = visualize_heatmap(image, output['heatmap'][0][i], norm_bboxes[0][i], inout_score=output['inout'][0][i] if output['inout'] is not None else None))
+        heatmap_results.append(overlay_img)
     # combined visualization with maximal gaze points for each person
     result_gazed = visualize_all(image, output['heatmap'][0], norm_bboxes[0], output['inout'][0] if output['inout'] is not None else None, inout_thresh=0.5)
+    return result_gazed, heatmap_results
+css="""
+div#col-container{
+    margin: 0 auto;
+    max-width: 982px;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.Markdown("# Gaze-LLE: Gaze Target Estimation via Large-Scale Learned Encoders")
+        gr.Markdown("A transformer approach for estimating gaze targets that leverages the power of pretrained visual foundation models. Gaze-LLE provides a streamlined gaze architecture that learns only a lightweight gaze decoder on top of a frozen, pretrained visual encoder (DINOv2). Gaze-LLE learns 1-2 orders of magnitude fewer parameters than prior works and doesn't require any extra input modalities like depth and pose!")
+        gr.HTML("""
+        <div style="display:flex;column-gap:4px;">
+            <a href="https://github.com/fkryan/gazelle">
+                <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
+            </a>
+            <a href="https://arxiv.org/abs/2412.09586">
+                <img src='https://img.shields.io/badge/ArXiv-Paper-red'>
+            </a>
+            <a href="https://huggingface.co/spaces/fffiloni/Gaze-LLE?duplicate=true">
+                <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-sm.svg" alt="Duplicate this Space">
+            </a>
+            <a href="https://huggingface.co/fffiloni">
+                <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-sm-dark.svg" alt="Follow me on HF">
+            </a>
+        </div>
+        """)
         with gr.Row():
             with gr.Column():
                 input_image = gr.Image(label="Image Input", type="filepath")
                 submit_button = gr.Button("Submit")
             with gr.Column():
                 result = gr.Image(label="Result")
+                heatmaps = gr.Gallery(label="Heatmap")
     submit_button.click(
         fn = main,
         inputs = [input_image],
+        outputs = [result, heatmaps]
     )
 demo.queue().launch(show_api=False, show_error=True)