Podfusion / animate.py
3v324v23's picture
Add overlays
2e38275
import os
import sys
from cgi import test
from pathlib import Path
import cv2
import mediapy
import numpy as np
from frame_interpolation.eval import interpolator, util
from huggingface_hub import snapshot_download
from image_tools.sizes import resize_and_crop
from moviepy.editor import CompositeVideoClip, ImageClip
from moviepy.editor import VideoFileClip as vfc
from PIL import Image
# get key positions at which frame needs to be generated
def list_of_positions(num_contours, num_frames=100):
positions = []
for i in range(0, num_frames):
positions.append(int(num_contours / num_frames * i))
return positions
def contourfinder(image1, image2, text=None, num_frames=100, output_dir=Path("temp")):
# Create two blank pages to write into
# I just hardcoded 1024*1024 as the size, ideally this should be np.shape(image1)
blank = np.zeros(np.shape(image1), dtype="uint8")
blank2 = np.zeros(np.shape(image2), dtype="uint8")
# Threshold and contours for image 1 and 2
threshold = cv2.Canny(image=image1, threshold1=100, threshold2=200)
contours, hierarchies = cv2.findContours(
threshold, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
threshold2 = cv2.Canny(image=image2, threshold1=100, threshold2=200)
contours2, hierarchies2 = cv2.findContours(
threshold2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE
)
# Initialize three empty videos
vid1 = cv2.VideoWriter(
Path(output_dir / "vid1.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
24,
threshold.shape,
)
vid2 = cv2.VideoWriter(
Path(output_dir / "vid2.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
24,
threshold.shape,
)
text_vid = cv2.VideoWriter(
Path(output_dir / "text_video.mp4").as_posix(),
cv2.VideoWriter_fourcc(*"mp4v"),
10,
threshold.shape,
)
# Get positions
positions = list_of_positions((len(contours)))
frames = []
# Loop over contours adding them to blank image then writing to video
for i in range(0, len(contours)):
cv2.drawContours(
blank, contours=contours, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank)
# Complile to video
vid1.write(blank)
vid1.release()
clip1 = vfc(Path(output_dir / "vid1.mp4").as_posix())
positions = list_of_positions((len(contours2)))
for i in range(0, len(contours2)):
cv2.drawContours(
blank2, contours=contours2, contourIdx=i, color=(125, 200, 255), thickness=1
)
if i in positions:
frames.append(blank2)
vid2.write(blank2)
vid2.release()
clip3 = vfc(Path(output_dir / "vid2.mp4").as_posix())
# Next is the text vid
if text != None:
# Reading an image in default mode
image = np.zeros(original.shape, dtype="uint8")
# font
font = cv2.FONT_HERSHEY_COMPLEX
# org
org = (10, 400)
# fontScale
fontScale = 3
# Blue color in BGR
color = (186, 184, 108)
# Line thickness of 2 px
thickness = 4
def text_frames(text, image, org):
spacing = 55 # spacing between letters
blink = image
cv2.imwrite(Path(output_dir / "blink.png").as_posix(), blink)
for i in range(0, len(text) - 1):
text_vid.write(blink)
# Using cv2.putText() method
image = cv2.putText(
image, text[i], org, font, fontScale, color, thickness, cv2.LINE_AA
)
# Take care of org spacing
org = (org[0] + spacing, org[1])
if text[i].isupper():
org = (org[0] + spacing + 1, org[1])
print(f"Upper {text[i]}")
print(org)
# Displaying the image
cv2.imwrite(Path(output_dir / f"text_im{i}.png").as_posix, image)
# Complile to video
text_vid.write(image)
text_vid.release()
text_frames(text, image, org)
return clip1, clip3
def load_model(model_name):
model = interpolator.Interpolator(snapshot_download(repo_id=model_name), None)
return model
model_names = [
"akhaliq/frame-interpolation-film-style",
"NimaBoscarino/frame-interpolation_film_l1",
"NimaBoscarino/frame_interpolation_film_vgg",
]
models = {model_name: load_model(model_name) for model_name in model_names}
ffmpeg_path = util.get_ffmpeg_path()
mediapy.set_ffmpeg(ffmpeg_path)
def resize(width, img):
basewidth = width
img = Image.open(img)
wpercent = basewidth / float(img.size[0])
hsize = int((float(img.size[1]) * float(wpercent)))
img = img.resize((basewidth, hsize), Image.ANTIALIAS)
return img
def resize_img(img1, img2, output_dir):
img_target_size = Image.open(img1)
img_to_resize = resize_and_crop(
img2,
(
img_target_size.size[0],
img_target_size.size[1],
), # set width and height to match cv2_images[0]
crop_origin="middle",
)
img_to_resize.save(Path(output_dir / "resized_img2.png"))
def get_video_frames(
images, vid_output_dir="temp", times_to_interpolate=6, model_name_index=0
):
frame1 = images[0]
frame2 = images[1]
model = models[model_names[model_name_index]]
cv2_images = [cv2.imread(frame1), cv2.imread(frame2)]
frame1 = resize(256, frame1)
frame2 = resize(256, frame2)
test_1 = Path(vid_output_dir / "test1.png")
test_2 = Path(vid_output_dir / "test2.png")
frame1.save(test_1)
frame2.save(test_2)
resize_img(test_1, test_2, vid_output_dir)
input_frames = [
Path(vid_output_dir / "test1.png").as_posix(),
Path(vid_output_dir / "resized_img2.png").as_posix(),
]
frames = list(
util.interpolate_recursively_from_files(
input_frames, times_to_interpolate, model
)
)
return frames, cv2_images
def create_mp4_with_audio(
frames, cv2_images, duration, audio, output_path, overlay_image
):
vid_output_dir = output_path.parent
temp_vid_path = Path(vid_output_dir / "TEMP.mp4")
mediapy.write_video(temp_vid_path, frames, fps=10)
print(
f"TYPES....{type(cv2_images[0])},{type(cv2_images[1])} SHAPES{cv2_images[0].shape} Img {cv2_images[0]}"
)
clip1, clip3 = contourfinder(
cv2_images[0], cv2_images[1], output_dir=vid_output_dir
) # has a third text option
# Use open CV and moviepy code
# So we move from open CV video 1 to out.mp4 to open CV video2
clip1 = clip1
clip2 = (
vfc(temp_vid_path.as_posix())
.resize(2)
.set_start(clip1.duration - 0.5)
.crossfadein(2)
)
clip3 = clip3.set_start((clip1.duration - 0.5) + (clip2.duration)).crossfadein(2)
new_clip = CompositeVideoClip([clip1, clip2, clip3])
new_clip.audio = audio # Naviely append audio without considering the length of the video, could be a problem, no idea, but it works, so I'm not touching it
image = (
ImageClip(overlay_image).set_duration(duration).resize(0.5).set_pos("center")
)
new_clip.set_duration(duration)
# Now overlay the image with moviepy
final_clip = CompositeVideoClip([new_clip, image])
final_clip.write_videofile(output_path.as_posix(), audio_codec="aac")
return output_path.as_posix()