Spaces:
Build error
Build error
File size: 6,139 Bytes
94f04b7 abe2204 94f04b7 783db6b 94f04b7 b05da2d 2e49a94 9930f16 46a60b0 94f04b7 afe246e ad21f68 783db6b 46a60b0 b05da2d 2e49a94 9930f16 b05da2d 46a60b0 ad21f68 783db6b 2e49a94 9930f16 2e49a94 783db6b 7a3883a 7c6db95 28eb7cb 7c6db95 afe246e 46a60b0 783db6b 46a60b0 afe246e 46a60b0 afe246e 783db6b afe246e 46a60b0 afe246e 46a60b0 783db6b afe246e 783db6b afe246e 783db6b 94f04b7 783db6b 94f04b7 abe2204 783db6b 94f04b7 783db6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import os
import gradio as gr
import numpy as np
from PIL import Image
import cv2
import spaces
from inference.seg import process_image_or_video as process_seg
from inference.pose import process_image_or_video as process_pose
from inference.depth import process_image_or_video as process_depth
from inference.normal import process_image_or_video as process_normal
from config import SAPIENS_LITE_MODELS_PATH
def update_model_choices(task):
model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
@spaces.GPU(duration=75)
def process_image(input_image, task, version):
if isinstance(input_image, np.ndarray):
input_image = Image.fromarray(input_image)
if task.lower() == 'seg':
result = process_seg(input_image, task=task.lower(), version=version)
elif task.lower() == 'pose':
result = process_pose(input_image, task=task.lower(), version=version)
elif task.lower() == 'depth':
result = process_depth(input_image, task=task.lower(), version=version)
elif task.lower() == 'normal':
result = process_normal(input_image, task=task.lower(), version=version)
else:
result = None
print(f"Tarea no soportada: {task}")
return result
@spaces.GPU(duration=75)
def process_video(input_video, task, version):
cap = cv2.VideoCapture(input_video)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
if task.lower() == 'seg':
processed_frame = process_seg(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'pose':
processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'depth':
processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
elif task.lower() == 'normal':
processed_frame = process_normal(frame_rgb, task=task.lower(), version=version)
else:
processed_frame = None
print(f"Tarea no soportada: {task}")
break
if processed_frame is not None:
processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR)
output_video.write(processed_frame_bgr)
cap.release()
output_video.release()
return 'output_video.mp4'
with gr.Blocks() as demo:
gr.Markdown("""
<div style="text-align: center; font-size: 35px; font-weight: bold; margin-bottom: 20px;">
Sapiens Huggingface Space🤗
</div>
<div style="text-align: center; font-size: 25px; font-weight: bold; margin-bottom: 20px;">
Foundation for Human Vision Models
</div>
<div style="text-align: center;">
<a href="https://huggingface.co/facebook/sapiens">🤗 Sapiens Models</a> |
<a href="https://github.com/facebookresearch/sapiens/">🌐 Github</a> |
<a href="https://www.arxiv.org/abs/2408.12569">📜 arxiv </a> |
<a href="https://joselo.ai">🔗Personal Blog </a>
</div>
<div style="text-align: center; font-size: 15px; font-weight: bold; margin-bottom: 20px;">
Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction.
</div>
""")
with gr.Tabs():
with gr.TabItem('Image'):
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Input Image", type="pil")
select_task_image = gr.Radio(
["seg", "pose", "depth", "normal"],
label="Task",
info="Choose the task to perform",
value="seg"
)
model_name_image = gr.Dropdown(
label="Model Version",
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
value="sapiens_0.3b",
)
with gr.Column():
result_image = gr.Image(label="Result")
run_button_image = gr.Button("Run")
with gr.TabItem('Video'):
with gr.Row():
with gr.Column():
input_video = gr.Video(label="Input Video")
select_task_video = gr.Radio(
["seg", "pose", "depth", "normal"],
label="Task",
info="Choose the task to perform",
value="seg"
)
model_name_video = gr.Dropdown(
label="Model Version",
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
value="sapiens_0.3b",
)
with gr.Column():
result_video = gr.Video(label="Result")
run_button_video = gr.Button("Run")
select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image)
select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video)
run_button_image.click(
fn=process_image,
inputs=[input_image, select_task_image, model_name_image],
outputs=[result_image],
)
run_button_video.click(
fn=process_video,
inputs=[input_video, select_task_video, model_name_video],
outputs=[result_video],
)
if __name__ == "__main__":
demo.launch(share=False) |