Spaces:
Running
Running
File size: 1,967 Bytes
8c79f36 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import streamlit as st
import os
from utils.demo import load_video, ctc_decode
from utils.two_stream_infer import load_model
import os
from scripts.extract_lip_coordinates import generate_lip_coordinates
import options as opt
st.set_page_config(layout="wide")
model = load_model()
st.title("LipCoordNet Demo")
st.info(
"The inference speed is very slow on Huggingface spaces due to it being processed entirely on CPU. For a quicker inference, please clone the repository and change the “device” under options.py to “cuda” for local inference using GPU",
icon="ℹ️",
)
# Generating a list of options or videos
options = os.listdir(os.path.join("app_input"))
selected_video = st.selectbox("Choose video", options)
col1, col2 = st.columns(2)
with col1:
file_path = os.path.join("app_input", selected_video)
video_name = selected_video.split(".")[0]
os.system(f"ffmpeg -i {file_path} -vcodec libx264 {video_name}.mp4 -y")
# Rendering inside of the app
video = open(f"{video_name}.mp4", "rb")
video_bytes = video.read()
st.video(video_bytes)
with col1, st.spinner("Splitting video into frames"):
video, img_p, files = load_video(f"{video_name}.mp4", opt.device)
prediction_video = video
st.markdown(f"Frames Generated:\n{files}")
frames_generated = True
with col1, st.spinner("Generating Lip Landmark Coordinates"):
coordinates = generate_lip_coordinates(f"{video_name}_samples")
prediction_coordinates = coordinates
st.markdown(f"Coordinates Generated:\n{coordinates}")
coordinates_generated = True
with col2:
st.info("Ready to make prediction!")
generate = st.button("Generate")
if generate:
with col2, st.spinner("Generating..."):
y = model(
prediction_video[None, ...].to(opt.device),
prediction_coordinates[None, ...].to(opt.device),
)
txt = ctc_decode(y[0])
st.text(txt[-1])
|