Spaces:
Build error
Build error
import os | |
import gradio as gr | |
import numpy as np | |
from PIL import Image | |
import cv2 | |
import spaces | |
from inference.seg import process_image_or_video as process_seg | |
from inference.pose import process_image_or_video as process_pose | |
from inference.depth import process_image_or_video as process_depth | |
from inference.normal import process_image_or_video as process_normal | |
from config import SAPIENS_LITE_MODELS_PATH | |
def update_model_choices(task): | |
model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys()) | |
return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None) | |
def process_image(input_image, task, version): | |
if isinstance(input_image, np.ndarray): | |
input_image = Image.fromarray(input_image) | |
if task.lower() == 'seg': | |
result = process_seg(input_image, task=task.lower(), version=version) | |
elif task.lower() == 'pose': | |
result = process_pose(input_image, task=task.lower(), version=version) | |
elif task.lower() == 'depth': | |
result = process_depth(input_image, task=task.lower(), version=version) | |
elif task.lower() == 'normal': | |
result = process_normal(input_image, task=task.lower(), version=version) | |
else: | |
result = None | |
print(f"Tarea no soportada: {task}") | |
return result | |
def process_video(input_video, task, version): | |
cap = cv2.VideoCapture(input_video) | |
fps = cap.get(cv2.CAP_PROP_FPS) | |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height)) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
if task.lower() == 'seg': | |
processed_frame = process_seg(frame_rgb, task=task.lower(), version=version) | |
elif task.lower() == 'pose': | |
processed_frame = process_pose(frame_rgb, task=task.lower(), version=version) | |
elif task.lower() == 'depth': | |
processed_frame = process_depth(frame_rgb, task=task.lower(), version=version) | |
elif task.lower() == 'normal': | |
processed_frame = process_normal(frame_rgb, task=task.lower(), version=version) | |
else: | |
processed_frame = None | |
print(f"Tarea no soportada: {task}") | |
break | |
if processed_frame is not None: | |
processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR) | |
output_video.write(processed_frame_bgr) | |
cap.release() | |
output_video.release() | |
return 'output_video.mp4' | |
with gr.Blocks() as demo: | |
gr.Markdown(""" | |
<div style="text-align: center; font-size: 35px; font-weight: bold; margin-bottom: 20px;"> | |
Sapiens Huggingface Space🤗 | |
</div> | |
<div style="text-align: center; font-size: 25px; font-weight: bold; margin-bottom: 20px;"> | |
Foundation for Human Vision Models | |
</div> | |
<div style="text-align: center;"> | |
<a href="https://huggingface.co/facebook/sapiens">🤗 Sapiens Models</a> | | |
<a href="https://github.com/facebookresearch/sapiens/">🌐 Github</a> | | |
<a href="https://www.arxiv.org/abs/2408.12569">📜 arxiv </a> | | |
<a href="https://joselo.ai">🔗Personal Blog </a> | |
</div> | |
<div style="text-align: center; font-size: 15px; font-weight: bold; margin-bottom: 20px;"> | |
Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction. | |
</div> | |
""") | |
with gr.Tabs(): | |
with gr.TabItem('Image'): | |
with gr.Row(): | |
with gr.Column(): | |
input_image = gr.Image(label="Input Image", type="pil") | |
select_task_image = gr.Radio( | |
["seg", "pose", "depth", "normal"], | |
label="Task", | |
info="Choose the task to perform", | |
value="seg" | |
) | |
model_name_image = gr.Dropdown( | |
label="Model Version", | |
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()), | |
value="sapiens_0.3b", | |
) | |
with gr.Column(): | |
result_image = gr.Image(label="Result") | |
run_button_image = gr.Button("Run") | |
with gr.TabItem('Video'): | |
with gr.Row(): | |
with gr.Column(): | |
input_video = gr.Video(label="Input Video") | |
select_task_video = gr.Radio( | |
["seg", "pose", "depth", "normal"], | |
label="Task", | |
info="Choose the task to perform", | |
value="seg" | |
) | |
model_name_video = gr.Dropdown( | |
label="Model Version", | |
choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()), | |
value="sapiens_0.3b", | |
) | |
with gr.Column(): | |
result_video = gr.Video(label="Result") | |
run_button_video = gr.Button("Run") | |
select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image) | |
select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video) | |
run_button_image.click( | |
fn=process_image, | |
inputs=[input_image, select_task_image, model_name_image], | |
outputs=[result_image], | |
) | |
run_button_video.click( | |
fn=process_video, | |
inputs=[input_video, select_task_video, model_name_video], | |
outputs=[result_video], | |
) | |
if __name__ == "__main__": | |
demo.launch(share=False) |