Spaces:

joselobenitezg
/

sapiens-demo

Build error

App Files Files Community

sapiens-demo / app.py

joselobenitezg

update sapiens link

28eb7cb verified 5 months ago

raw

history blame contribute delete

6.14 kB

	import os
	import gradio as gr
	import numpy as np
	from PIL import Image
	import cv2
	import spaces

	from inference.seg import process_image_or_video as process_seg
	from inference.pose import process_image_or_video as process_pose
	from inference.depth import process_image_or_video as process_depth
	from inference.normal import process_image_or_video as process_normal
	from config import SAPIENS_LITE_MODELS_PATH

	def update_model_choices(task):
	model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
	return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)

	@spaces.GPU(duration=75)
	def process_image(input_image, task, version):
	if isinstance(input_image, np.ndarray):
	input_image = Image.fromarray(input_image)

	if task.lower() == 'seg':
	result = process_seg(input_image, task=task.lower(), version=version)
	elif task.lower() == 'pose':
	result = process_pose(input_image, task=task.lower(), version=version)
	elif task.lower() == 'depth':
	result = process_depth(input_image, task=task.lower(), version=version)
	elif task.lower() == 'normal':
	result = process_normal(input_image, task=task.lower(), version=version)
	else:
	result = None
	print(f"Tarea no soportada: {task}")

	return result

	@spaces.GPU(duration=75)
	def process_video(input_video, task, version):
	cap = cv2.VideoCapture(input_video)
	fps = cap.get(cv2.CAP_PROP_FPS)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	if task.lower() == 'seg':
	processed_frame = process_seg(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'pose':
	processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'depth':
	processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'normal':
	processed_frame = process_normal(frame_rgb, task=task.lower(), version=version)
	else:
	processed_frame = None
	print(f"Tarea no soportada: {task}")
	break

	if processed_frame is not None:
	processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR)
	output_video.write(processed_frame_bgr)

	cap.release()
	output_video.release()

	return 'output_video.mp4'

	with gr.Blocks() as demo:
	gr.Markdown("""
	<div style="text-align: center; font-size: 35px; font-weight: bold; margin-bottom: 20px;">
	Sapiens Huggingface Space🤗
	</div>
	<div style="text-align: center; font-size: 25px; font-weight: bold; margin-bottom: 20px;">
	Foundation for Human Vision Models
	</div>
	<div style="text-align: center;">
	<a href="https://huggingface.co/facebook/sapiens">🤗 Sapiens Models</a> \|
	<a href="https://github.com/facebookresearch/sapiens/">🌐 Github</a> \|
	<a href="https://www.arxiv.org/abs/2408.12569">📜 arxiv </a> \|
	<a href="https://joselo.ai">🔗Personal Blog </a>
	</div>
	<div style="text-align: center; font-size: 15px; font-weight: bold; margin-bottom: 20px;">
	Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction.
	</div>
	""")
	with gr.Tabs():
	with gr.TabItem('Image'):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label="Input Image", type="pil")
	select_task_image = gr.Radio(
	["seg", "pose", "depth", "normal"],
	label="Task",
	info="Choose the task to perform",
	value="seg"
	)
	model_name_image = gr.Dropdown(
	label="Model Version",
	choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
	value="sapiens_0.3b",
	)
	with gr.Column():
	result_image = gr.Image(label="Result")
	run_button_image = gr.Button("Run")

	with gr.TabItem('Video'):
	with gr.Row():
	with gr.Column():
	input_video = gr.Video(label="Input Video")
	select_task_video = gr.Radio(
	["seg", "pose", "depth", "normal"],
	label="Task",
	info="Choose the task to perform",
	value="seg"
	)
	model_name_video = gr.Dropdown(
	label="Model Version",
	choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
	value="sapiens_0.3b",
	)
	with gr.Column():
	result_video = gr.Video(label="Result")
	run_button_video = gr.Button("Run")

	select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image)
	select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video)

	run_button_image.click(
	fn=process_image,
	inputs=[input_image, select_task_image, model_name_image],
	outputs=[result_image],
	)

	run_button_video.click(
	fn=process_video,
	inputs=[input_video, select_task_video, model_name_video],
	outputs=[result_video],
	)

	if __name__ == "__main__":
	demo.launch(share=False)