Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,964 Bytes
a3a3ae4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "4"
import re
import cv2
import einops
import numpy as np
import torch
import random
import math
from PIL import Image, ImageDraw, ImageFont
import shutil
import glob
from tqdm import tqdm
import subprocess as sp
import argparse
import imageio
import sys
import json
import datetime
import string
from dataset.opencv_transforms.functional import to_tensor, center_crop
from pytorch_lightning import seed_everything
from sgm.util import append_dims
from sgm.util import autocast, instantiate_from_config
from vtdm.model import create_model, load_state_dict
from vtdm.util import tensor2vid, export_to_video
from einops import rearrange
import yaml
import numpy as np
import random
import torch
from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
from basicsr.data.transforms import paired_random_crop
from basicsr.models.sr_model import SRModel
from basicsr.utils import DiffJPEG, USMSharp
from basicsr.utils.img_process_util import filter2D
from basicsr.utils.registry import MODEL_REGISTRY
from torch.nn import functional as F
import torch.nn as nn
class DegradedImages(torch.nn.Module):
def __init__(self, freeze=True):
super().__init__()
with open('configs/train_realesrnet_x4plus.yml', mode='r') as f:
opt = yaml.load(f, Loader=yaml.FullLoader)
self.opt = opt
@autocast
@torch.no_grad()
def forward(self, images, videos, masks, kernel1s, kernel2s, sinc_kernels):
'''
images: (2, 3, 1024, 1024) [-1, 1]
videos: (2, 3, 16, 1024, 1024) [-1, 1]
masks: (2, 16, 1024, 1024)
kernel1s, kernel2s, sinc_kernels: (2, 16, 21, 21)
'''
self.jpeger = DiffJPEG(differentiable=False).cuda()
B, C, H, W = images.shape
ori_h, ori_w = videos.size()[3:5]
videos = videos / 2.0 + 0.5
videos = rearrange(videos, 'b c t h w -> b t c h w') #(2, 16, 3, 1024, 1024)
all_lqs = []
for i in range(B):
kernel1 = kernel1s[i]
kernel2 = kernel2s[i]
sinc_kernel = sinc_kernels[i]
gt = videos[i] # (16, 3, 1024, 1024)
mask = masks[i] # (16, 1024, 1024)
# ----------------------- The first degradation process ----------------------- #
# blur
out = filter2D(gt, kernel1)
# random resize
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
if updown_type == 'up':
scale = np.random.uniform(1, self.opt['resize_range'][1])
elif updown_type == 'down':
scale = np.random.uniform(self.opt['resize_range'][0], 1)
else:
scale = 1
mode = random.choice(['area', 'bilinear', 'bicubic'])
out = F.interpolate(out, scale_factor=scale, mode=mode)
# add noise
gray_noise_prob = self.opt['gray_noise_prob']
if np.random.uniform() < self.opt['gaussian_noise_prob']:
out = random_add_gaussian_noise_pt(
out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
else:
out = random_add_poisson_noise_pt(
out,
scale_range=self.opt['poisson_scale_range'],
gray_prob=gray_noise_prob,
clip=True,
rounds=False)
# JPEG compression
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
out = self.jpeger(out, quality=jpeg_p)
# ----------------------- The second degradation process ----------------------- #
# blur
if np.random.uniform() < self.opt['second_blur_prob']:
out = filter2D(out, kernel2)
# random resize
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
if updown_type == 'up':
scale = np.random.uniform(1, self.opt['resize_range2'][1])
elif updown_type == 'down':
scale = np.random.uniform(self.opt['resize_range2'][0], 1)
else:
scale = 1
mode = random.choice(['area', 'bilinear', 'bicubic'])
out = F.interpolate(
out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
# add noise
gray_noise_prob = self.opt['gray_noise_prob2']
if np.random.uniform() < self.opt['gaussian_noise_prob2']:
out = random_add_gaussian_noise_pt(
out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
else:
out = random_add_poisson_noise_pt(
out,
scale_range=self.opt['poisson_scale_range2'],
gray_prob=gray_noise_prob,
clip=True,
rounds=False)
# JPEG compression + the final sinc filter
# We also need to resize images to desired sizes. We group [resize back + sinc filter] together
# as one operation.
# We consider two orders:
# 1. [resize back + sinc filter] + JPEG compression
# 2. JPEG compression + [resize back + sinc filter]
# Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
if np.random.uniform() < 0.5:
# resize back + the final sinc filter
mode = random.choice(['area', 'bilinear', 'bicubic'])
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
out = filter2D(out, sinc_kernel)
# JPEG compression
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
out = torch.clamp(out, 0, 1)
out = self.jpeger(out, quality=jpeg_p)
else:
# JPEG compression
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
out = torch.clamp(out, 0, 1)
out = self.jpeger(out, quality=jpeg_p)
# resize back + the final sinc filter
mode = random.choice(['area', 'bilinear', 'bicubic'])
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
out = filter2D(out, sinc_kernel)
# clamp and round
lqs = torch.clamp((out * 255.0).round(), 0, 255) / 255.
mode = random.choice(['area', 'bilinear', 'bicubic'])
lqs = F.interpolate(lqs, size=(ori_h, ori_w), mode=mode) # 16,3,1024,1024
lqs = rearrange(lqs, 't c h w -> t h w c') # 16, 1024, 1024, 3
for j in range(16):
lqs[j][mask[j]==0] = 1.0
all_lqs.append(lqs)
# import cv2
# gt1 = gt[0]
# lq1 = lqs[0]
# gt1 = rearrange(gt1, 'c h w -> h w c')
# gt1 = (gt1.cpu().numpy() * 255.).astype('uint8')
# lq1 = (lq1.cpu().numpy() * 255.).astype('uint8')
# cv2.imwrite(f'gt{i}.png', gt1)
# cv2.imwrite(f'lq{i}.png', lq1)
all_lqs = [(f - 0.5) * 2.0 for f in all_lqs]
all_lqs = torch.stack(all_lqs, 0) # 2, 16, 1024, 1024, 3
all_lqs = rearrange(all_lqs, 'b t h w c -> b t c h w')
for i in range(B):
all_lqs[i][0] = images[i]
all_lqs = rearrange(all_lqs, 'b t c h w -> (b t) c h w')
return all_lqs |