import cv2 import numpy as np import torch def ssim(img1, img2): C1 = 0.01**2 C2 = 0.03**2 img1 = img1.astype(np.float64) img2 = img2.astype(np.float64) kernel = cv2.getGaussianKernel(11, 1.5) window = np.outer(kernel, kernel.transpose()) mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] # valid mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] mu1_sq = mu1**2 mu2_sq = mu2**2 mu1_mu2 = mu1 * mu2 sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) return ssim_map.mean() def calculate_ssim_function(img1, img2): # [0,1] # ssim is the only metric extremely sensitive to gray being compared to b/w if not img1.shape == img2.shape: raise ValueError("Input images must have the same dimensions.") if img1.ndim == 2: return ssim(img1, img2) elif img1.ndim == 3: if img1.shape[0] == 3: ssims = [] for i in range(3): ssims.append(ssim(img1[i], img2[i])) return np.array(ssims).mean() elif img1.shape[0] == 1: return ssim(np.squeeze(img1), np.squeeze(img2)) else: raise ValueError("Wrong input image dimensions.") def trans(x): return x def calculate_ssim(videos1, videos2): # videos [batch_size, timestamps, channel, h, w] assert videos1.shape == videos2.shape videos1 = trans(videos1) videos2 = trans(videos2) ssim_results = [] for video_num in range(videos1.shape[0]): # get a video # video [timestamps, channel, h, w] video1 = videos1[video_num] video2 = videos2[video_num] ssim_results_of_a_video = [] for clip_timestamp in range(len(video1)): # get a img # img [timestamps[x], channel, h, w] # img [channel, h, w] numpy img1 = video1[clip_timestamp].numpy() img2 = video2[clip_timestamp].numpy() # calculate ssim of a video ssim_results_of_a_video.append(calculate_ssim_function(img1, img2)) ssim_results.append(ssim_results_of_a_video) ssim_results = np.array(ssim_results) ssim = {} ssim_std = {} for clip_timestamp in range(len(video1)): ssim[clip_timestamp] = np.mean(ssim_results[:, clip_timestamp]) ssim_std[clip_timestamp] = np.std(ssim_results[:, clip_timestamp]) result = { "value": ssim, "value_std": ssim_std, "video_setting": video1.shape, "video_setting_name": "time, channel, heigth, width", } return result # test code / using example def main(): NUMBER_OF_VIDEOS = 8 VIDEO_LENGTH = 50 CHANNEL = 3 SIZE = 64 videos1 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) videos2 = torch.zeros(NUMBER_OF_VIDEOS, VIDEO_LENGTH, CHANNEL, SIZE, SIZE, requires_grad=False) torch.device("cuda") import json result = calculate_ssim(videos1, videos2) print(json.dumps(result, indent=4)) if __name__ == "__main__": main()