''' | |
This file is the whole dataset curation pipeline to collect the least compressed and the most informative frames from video source. | |
''' | |
import os, time, sys | |
import shutil | |
import cv2 | |
import torch | |
import argparse | |
# Import files from the local folder | |
root_path = os.path.abspath('.') | |
sys.path.append(root_path) | |
from opt import opt | |
from dataset_curation_pipeline.IC9600.gene import infer_one_image | |
from dataset_curation_pipeline.IC9600.ICNet import ICNet | |
class video_scoring: | |
def __init__(self, IC9600_pretrained_weight_path) -> None: | |
# Init the model | |
self.scorer = ICNet() | |
self.scorer.load_state_dict(torch.load(IC9600_pretrained_weight_path, map_location=torch.device('cpu'))) | |
self.scorer.eval().cuda() | |
def select_frame(self, skip_num, img_lists, target_frame_num, save_dir, output_name_head, partition_idx): | |
''' Execution of scoring to all I-Frame in img_folder and select target_frame to return back | |
Args: | |
skip_num (int): Only 1 in skip_num will be chosen to accelerate. | |
img_lists (str): The image lists of all files we want to process | |
target_frame_num (int): The number of frames we need to choose | |
save_dir (str): The path where we save those images | |
output_name_head (str): This is the input video name head | |
partition_idx (int): The partition idx | |
''' | |
stores = [] | |
for idx, image_path in enumerate(sorted(img_lists)): | |
if idx % skip_num != 0: | |
# We only process 1 in 3 to accelerate and also prevent minor case of repeated scene. | |
continue | |
# Evaluate the image complexity score for this image | |
score = infer_one_image(self.scorer, image_path) | |
if verbose: | |
print(image_path, score) | |
stores.append((score, image_path)) | |
if verbose: | |
print(image_path, score) | |
# Find the top most scores' images | |
stores.sort(key=lambda x:x[0]) | |
selected = stores[-target_frame_num:] | |
# print(len(stores), len(selected)) | |
if verbose: | |
print("The lowest selected score is ", selected[0]) # This is a kind of info | |
# Store the selected images | |
for idx, (score, img_path) in enumerate(selected): | |
output_name = output_name_head + "_" +str(partition_idx)+ "_" + str(idx) + ".png" | |
output_path = os.path.join(save_dir, output_name) | |
shutil.copyfile(img_path, output_path) | |
def run(self, skip_num, img_folder, target_frame_num, save_dir, output_name_head, partition_num): | |
''' Execution of scoring to all I-Frame in img_folder and select target_frame to return back | |
Args: | |
skip_num (int): Only 1 in skip_num will be chosen to accelerate. | |
img_folder (str): The image folder of all I-Frames we need to process | |
target_frame_num (int): The number of frames we need to choose | |
save_dir (str): The path where we save those images | |
output_name_head (str): This is the input video name head | |
partition_num (int): The number of partition we want to crop the video to | |
''' | |
assert(target_frame_num%partition_num == 0) | |
img_lists = [] | |
for img_name in sorted(os.listdir(img_folder)): | |
path = os.path.join(img_folder, img_name) | |
img_lists.append(path) | |
length = len(img_lists) | |
unit_length = (length // partition_num) | |
target_partition_num = target_frame_num // partition_num | |
# Cut the folder to several partition and select those with the highest score | |
for idx in range(partition_num): | |
select_lists = img_lists[unit_length*idx : unit_length*(idx+1)] | |
self.select_frame(skip_num, select_lists, target_partition_num, save_dir, output_name_head, idx) | |
class frame_collector: | |
def __init__(self, IC9600_pretrained_weight_path, verbose) -> None: | |
self.scoring = video_scoring(IC9600_pretrained_weight_path) | |
self.verbose = verbose | |
def video_split_by_IFrame(self, video_path, tmp_path): | |
''' Split the video to its I-Frames format | |
Args: | |
video_path (str): The directory to a single video | |
tmp_path (str): A temporary working places to work and will be delete at the end | |
''' | |
# Prepare the work folder needed | |
if os.path.exists(tmp_path): | |
shutil.rmtree(tmp_path) | |
os.makedirs(tmp_path) | |
# Split Video I-frame | |
cmd = "ffmpeg -i " + video_path + " -loglevel error -vf select='eq(pict_type\,I)' -vsync 2 -f image2 -q:v 1 " + tmp_path + "/image-%06d.png" # At most support 100K I-Frames per video | |
if self.verbose: | |
print(cmd) | |
os.system(cmd) | |
def collect_frames(self, video_folder_dir, save_dir, tmp_path, skip_num, target_frames, partition_num): | |
''' Automatically collect frames from the video dir | |
Args: | |
video_folder_dir (str): The directory of all videos input | |
save_dir (str): The directory we will store the selected frames | |
tmp_path (str): A temporary working places to work and will be delete at the end | |
skip_num (int): Only 1 in skip_num will be chosen to accelerate. | |
target_frames (list): [# of frames for video under 30 min, # of frames for video over 30 min] | |
partition_num (int): The number of partition we want to crop the video to | |
''' | |
# Iterate all video under video_folder_dir | |
for video_name in sorted(os.listdir(video_folder_dir)): | |
# Sanity check for this video file format | |
info = video_name.split('.') | |
if info[-1] not in ['mp4', 'mkv', '']: | |
continue | |
output_name_head, extension = info | |
# Get info of this video | |
video_path = os.path.join(video_folder_dir, video_name) | |
duration = get_duration(video_path) # unit in minutes | |
print("We are processing " + video_path + " with duration " + str(duration) + " min") | |
# Split the video to I-frame | |
self.video_split_by_IFrame(video_path, tmp_path) | |
# Score the frames and select those top scored frames we need | |
if duration <= 30: | |
target_frame_num = target_frames[0] | |
else: | |
target_frame_num = target_frames[1] | |, tmp_path, target_frame_num, save_dir, output_name_head, partition_num) | |
# Remove folders if needed | |
def get_duration(filename): | |
video = cv2.VideoCapture(filename) | |
fps = video.get(cv2.CAP_PROP_FPS) | |
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) | |
seconds = frame_count / fps | |
minutes = int(seconds / 60) | |
return minutes | |
if __name__ == "__main__": | |
# Fundamental setting | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--video_folder_dir', type = str, default = '../anime_videos', help = "A folder with video sources") | |
parser.add_argument('--IC9600_pretrained_weight_path', type = str, default = "pretrained/ck.pth", help = "The pretrained IC9600 weight") | |
parser.add_argument('--save_dir', type = str, default = 'APISR_dataset', help = "The folder to store filtered dataset") | |
parser.add_argument('--skip_num', type = int, default = 5, help = "Only 1 in skip_num will be chosen in sequential I-frames to accelerate.") | |
parser.add_argument('--target_frames', type = list, default = [16, 24], help = "[# of frames for video under 30 min, # of frames for video over 30 min]") | |
parser.add_argument('--partition_num', type = int, default = 8, help = "The number of partition we want to crop the video to, to increase diversity of sampling") | |
parser.add_argument('--verbose', type = bool, default = True, help = "Whether we print log message") | |
args = parser.parse_args() | |
# Transform to variable | |
video_folder_dir = args.video_folder_dir | |
IC9600_pretrained_weight_path = args.IC9600_pretrained_weight_path | |
save_dir = args.save_dir | |
skip_num = args.skip_num | |
target_frames = args.target_frames # [# of frames for video under 30 min, # of frames for video over 30 min] | |
partition_num = args.partition_num | |
verbose = args.verbose | |
# Secondary setting | |
tmp_path = "tmp_dataset" | |
# Prepare | |
if os.path.exists(save_dir): | |
shutil.rmtree(save_dir) | |
os.makedirs(save_dir) | |
# Process | |
start = time.time() | |
obj = frame_collector(IC9600_pretrained_weight_path, verbose) | |
obj.collect_frames(video_folder_dir, save_dir, tmp_path, skip_num, target_frames, partition_num) | |
total_time = (time.time() - start)//60 | |
print("Total time spent is {} min".format(total_time)) | |
shutil.rmtree(tmp_path) |