Spaces:

vijulshah
/

pupilsense

Running

App Files Files Community

vijul.shah commited on Jul 20, 2024

Commit

0f2d9f6

1 Parent(s): 51ba5d6

End-to-End Pipeline Configured

Browse files

Files changed (32) hide show

.gitignore +1 -1
SR_Inference/codeformer/codeformer_arch.py +271 -0
SR_Inference/codeformer/vqgan_arch.py +418 -0
SR_Inference/codeformer/weights/codeformer_v0.1.0.pth +3 -0
SR_Inference/gfpgan/weights/GFPGANv1.3.pth +3 -0
SR_Inference/gfpgan/weights/detection_Resnet50_Final.pth +3 -0
SR_Inference/gfpgan/weights/parsing_parsenet.pth +3 -0
SR_Inference/hat/hat_arch.py +979 -0
SR_Inference/hat/weights/HAT-L_SRx2_ImageNet-pretrain.pth +3 -0
SR_Inference/hat/weights/HAT_SRx2_ImageNet-pretrain.pth +3 -0
SR_Inference/hat/weights/HAT_SRx4_ImageNet-pretrain.pth +3 -0
SR_Inference/inference_codeformer.py +126 -0
SR_Inference/inference_gfpgan.py +76 -0
SR_Inference/inference_hat.py +104 -0
SR_Inference/inference_realesr.py +52 -0
SR_Inference/inference_sr_utils.py +101 -0
SR_Inference/inference_srresnet.py +78 -0
SR_Inference/realesrgan/weights/RealESRGAN_x2plus.pth +3 -0
SR_Inference/realesrgan/weights/RealESRGAN_x4plus.pth +3 -0
SR_Inference/srresnet/weights/SRResNet_2x.pth +3 -0
SR_Inference/srresnet/weights/SRResNet_4x.pth +3 -0
app.py +258 -121
config.yml +3 -3
feature_extraction/extractor_mediapipe.py +365 -0
feature_extraction/features_extractor.py +50 -0
packages.txt +0 -0
preprocessing/dataset_creation.py +40 -0
preprocessing/dataset_creation_utils.py +29 -0
registrations/models.py +1 -1
registry_utils.py +5 -5
requirements.txt +3 -2
xx-packages.txt +28 -0

.gitignore CHANGED Viewed

	@@ -1 +1 @@
1	- __pycache__/


1	+ __pycache__

SR_Inference/codeformer/codeformer_arch.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import math
+import torch
+from .vqgan_arch import *
+from typing import Optional
+from torch import nn, Tensor
+import torch.nn.functional as F
+def calc_mean_std(feat, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, 'The input feature should be 4D tensor.'
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+def adaptive_instance_normalization(content_feat, style_feat):
+    """Adaptive instance normalization.
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+class TransformerSALayer(nn.Module):
+    def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+        # Implementation of Feedforward model - MLP
+        self.linear1 = nn.Linear(embed_dim, dim_mlp)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_mlp, embed_dim)
+        self.norm1 = nn.LayerNorm(embed_dim)
+        self.norm2 = nn.LayerNorm(embed_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        # self attention
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        # ffn
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+class Fuse_sft_block(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.encode_enc = ResBlock(2*in_ch, out_ch)
+        self.scale = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+        self.shift = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+    def forward(self, enc_feat, dec_feat, w=1):
+        enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+        scale = self.scale(enc_feat)
+        shift = self.shift(enc_feat)
+        residual = w * (dec_feat * scale + shift)
+        out = dec_feat + residual
+        return out
+class CodeFormerArch(VQAutoEncoder):
+    def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+                codebook_size=1024, latent_size=256,
+                connect_list=['32', '64', '128', '256'],
+                fix_modules=['quantize','generator'], vqgan_path=None):
+        super(CodeFormerArch, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size)
+        if vqgan_path is not None:
+            self.load_state_dict(
+                torch.load(vqgan_path, map_location='cpu')['params_ema'])
+        if fix_modules is not None:
+            for module in fix_modules:
+                for param in getattr(self, module).parameters():
+                    param.requires_grad = False
+        self.connect_list = connect_list
+        self.n_layers = n_layers
+        self.dim_embd = dim_embd
+        self.dim_mlp = dim_embd*2
+        self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))
+        self.feat_emb = nn.Linear(256, self.dim_embd)
+        # transformer
+        self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+                                    for _ in range(self.n_layers)])
+        # logits_predict head
+        self.idx_pred_layer = nn.Sequential(
+            nn.LayerNorm(dim_embd),
+            nn.Linear(dim_embd, codebook_size, bias=False))
+        self.channels = {
+            '16': 512,
+            '32': 256,
+            '64': 256,
+            '128': 128,
+            '256': 128,
+            '512': 64,
+        }
+        # after second residual block for > 16, before attn layer for ==16
+        self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18}
+        # after first residual block for > 16, before attn layer for ==16
+        self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21}
+        # fuse_convs_dict
+        self.fuse_convs_dict = nn.ModuleDict()
+        for f_size in self.connect_list:
+            in_ch = self.channels[f_size]
+            self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(self, x, w=0, detach_16=True, code_only=False, adain=False):
+        # ################### Encoder #####################
+        enc_feat_dict = {}
+        out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.encoder.blocks):
+            x = block(x)
+            if i in out_list:
+                enc_feat_dict[str(x.shape[-1])] = x.clone()
+        lq_feat = x
+        # ################# Transformer ###################
+        # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+        pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1)
+        # BCHW -> BC(HW) -> (HW)BC
+        feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1))
+        query_emb = feat_emb
+        # Transformer encoder
+        for layer in self.ft_layers:
+            query_emb = layer(query_emb, query_pos=pos_emb)
+        # output logits
+        logits = self.idx_pred_layer(query_emb) # (hw)bn
+        logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n
+        if code_only: # for training stage II
+          # logits doesn't need softmax before cross_entropy loss
+            return logits, lq_feat
+        # ################# Quantization ###################
+        # if self.training:
+        #     quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+        #     # b(hw)c -> bc(hw) -> bchw
+        #     quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+        # ------------
+        soft_one_hot = F.softmax(logits, dim=2)
+        _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+        quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256])
+        # preserve gradients
+        # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+        if detach_16:
+            quant_feat = quant_feat.detach() # for training stage III
+        if adain:
+            quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+        # ################## Generator ####################
+        x = quant_feat
+        fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.generator.blocks):
+            x = block(x)
+            if i in fuse_list: # fuse after i-th block
+                f_size = str(x.shape[-1])
+                if w>0:
+                    x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
+        out = x
+        # logits doesn't need softmax before cross_entropy loss
+        return out, logits, lq_feat

SR_Inference/codeformer/vqgan_arch.py ADDED Viewed

	@@ -0,0 +1,418 @@

+'''
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+'''
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from basicsr.utils import get_root_logger
+def normalize(in_channels):
+    return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+@torch.jit.script
+def swish(x):
+    return x*torch.sigmoid(x)
+#  Define VQVAE classes
+class VectorQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.beta = beta  # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+        self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size)
+    def forward(self, z):
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.emb_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \
+            2 * torch.matmul(z_flattened, self.embedding.weight.t())
+        mean_distance = torch.mean(d)
+        # find closest encodings
+        min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+        # min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False)
+        # [0-1], higher score, higher confidence
+        # min_encoding_scores = torch.exp(-min_encoding_scores/10)
+        min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q, loss, {
+            "perplexity": perplexity,
+            "min_encodings": min_encodings,
+            "min_encoding_indices": min_encoding_indices,
+            "mean_distance": mean_distance
+            }
+    def get_codebook_feat(self, indices, shape):
+        # input indices: batch*token_num -> (batch*token_num)*1
+        # shape: batch, height, width, channel
+        indices = indices.view(-1,1)
+        min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+        min_encodings.scatter_(1, indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+        if shape is not None:  # reshape back to match original input shape
+            z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+        return z_q
+class GumbelQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0):
+        super().__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+        self.proj = nn.Conv2d(num_hiddens, codebook_size, 1)  # projects last encoder layer to quantized logits
+        self.embed = nn.Embedding(codebook_size, emb_dim)
+    def forward(self, z):
+        hard = self.straight_through if self.training else True
+        logits = self.proj(z)
+        soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+        z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+        min_encoding_indices = soft_one_hot.argmax(dim=1)
+        return z_q, diff, {
+            "min_encoding_indices": min_encoding_indices
+        }
+class Downsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+    def forward(self, x):
+        pad = (0, 1, 0, 1)
+        x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+        return x
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels=None):
+        super(ResBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = in_channels if out_channels is None else out_channels
+        self.norm1 = normalize(in_channels)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = normalize(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x_in):
+        x = x_in
+        x = self.norm1(x)
+        x = swish(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = swish(x)
+        x = self.conv2(x)
+        if self.in_channels != self.out_channels:
+            x_in = self.conv_out(x_in)
+        return x + x_in
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h*w)
+        q = q.permute(0, 2, 1)
+        k = k.reshape(b, c, h*w)
+        w_ = torch.bmm(q, k)
+        w_ = w_ * (int(c)**(-0.5))
+        w_ = F.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h*w)
+        w_ = w_.permute(0, 2, 1)
+        h_ = torch.bmm(v, w_)
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x+h_
+class Encoder(nn.Module):
+    def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.attn_resolutions = attn_resolutions
+        curr_res = self.resolution
+        in_ch_mult = (1,)+tuple(ch_mult)
+        blocks = []
+        # initial convultion
+        blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+        # residual and downsampling blocks, with attention on smaller res (16x16)
+        for i in range(self.num_resolutions):
+            block_in_ch = nf * in_ch_mult[i]
+            block_out_ch = nf * ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != self.num_resolutions - 1:
+                blocks.append(Downsample(block_in_ch))
+                curr_res = curr_res // 2
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        # normalise and convert to latent size
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class Generator(nn.Module):
+    def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.ch_mult = ch_mult
+        self.num_resolutions = len(self.ch_mult)
+        self.num_res_blocks = res_blocks
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.in_channels = emb_dim
+        self.out_channels = 3
+        block_in_ch = self.nf * self.ch_mult[-1]
+        curr_res = self.resolution // 2 ** (self.num_resolutions-1)
+        blocks = []
+        # initial conv
+        blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1))
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        for i in reversed(range(self.num_resolutions)):
+            block_out_ch = self.nf * self.ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in self.attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != 0:
+                blocks.append(Upsample(block_in_ch))
+                curr_res = curr_res * 2
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class VQAutoEncoder(nn.Module):
+    def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
+                beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
+        super().__init__()
+        logger = get_root_logger()
+        self.in_channels = 3
+        self.nf = nf
+        self.n_blocks = res_blocks
+        self.codebook_size = codebook_size
+        self.embed_dim = emb_dim
+        self.ch_mult = ch_mult
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.quantizer_type = quantizer
+        self.encoder = Encoder(
+            self.in_channels,
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if self.quantizer_type == "nearest":
+            self.beta = beta #0.25
+            self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta)
+        elif self.quantizer_type == "gumbel":
+            self.gumbel_num_hiddens = emb_dim
+            self.straight_through = gumbel_straight_through
+            self.kl_weight = gumbel_kl_weight
+            self.quantize = GumbelQuantizer(
+                self.codebook_size,
+                self.embed_dim,
+                self.gumbel_num_hiddens,
+                self.straight_through,
+                self.kl_weight
+            )
+        self.generator = Generator(
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_ema' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema'])
+                logger.info(f'vqgan is loaded from: {model_path} [params_ema]')
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+                logger.info(f'vqgan is loaded from: {model_path} [params]')
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        x = self.encoder(x)
+        quant, codebook_loss, quant_stats = self.quantize(x)
+        x = self.generator(quant)
+        return x, codebook_loss, quant_stats
+# patch based discriminator
+class VQGANDiscriminator(nn.Module):
+    def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None):
+        super().__init__()
+        layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
+        ndf_mult = 1
+        ndf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            ndf_mult_prev = ndf_mult
+            ndf_mult = min(2 ** n, 8)
+            layers += [
+                nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+                nn.BatchNorm2d(ndf * ndf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+        ndf_mult_prev = ndf_mult
+        ndf_mult = min(2 ** n_layers, 8)
+        layers += [
+            nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(ndf * ndf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+        layers += [
+            nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)]  # output 1 channel prediction map
+        self.main = nn.Sequential(*layers)
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_d' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d'])
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        return self.main(x)

SR_Inference/codeformer/weights/codeformer_v0.1.0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7
+size 376637898

SR_Inference/gfpgan/weights/GFPGANv1.3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c953a88f2727c85c3d9ae72e2bd4846bbaf59fe6972ad94130e23e7017524a70
+size 348632874

SR_Inference/gfpgan/weights/detection_Resnet50_Final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
+size 109497761

SR_Inference/gfpgan/weights/parsing_parsenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
+size 85331193

SR_Inference/hat/hat_arch.py ADDED Viewed

	@@ -0,0 +1,979 @@

+import math
+import torch
+import torch.nn as nn
+from einops import rearrange
+from basicsr.archs.arch_util import to_2tuple, trunc_normal_
+def drop_path(x, drop_prob: float = 0., training: bool = False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0], ) + (1, ) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+    random_tensor.floor_()  # binarize
+    output = x.div(keep_prob) * random_tensor
+    return output
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    From: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/layers/drop.py
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+class ChannelAttention(nn.Module):
+    """Channel attention used in RCAN.
+    Args:
+        num_feat (int): Channel number of intermediate features.
+        squeeze_factor (int): Channel squeeze factor. Default: 16.
+    """
+    def __init__(self, num_feat, squeeze_factor=16):
+        super(ChannelAttention, self).__init__()
+        self.attention = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(num_feat, num_feat // squeeze_factor, 1, padding=0),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(num_feat // squeeze_factor, num_feat, 1, padding=0),
+            nn.Sigmoid())
+    def forward(self, x):
+        y = self.attention(x)
+        return x * y
+class CAB(nn.Module):
+    def __init__(self, num_feat, compress_ratio=3, squeeze_factor=30):
+        super(CAB, self).__init__()
+        self.cab = nn.Sequential(
+            nn.Conv2d(num_feat, num_feat // compress_ratio, 3, 1, 1),
+            nn.GELU(),
+            nn.Conv2d(num_feat // compress_ratio, num_feat, 3, 1, 1),
+            ChannelAttention(num_feat, squeeze_factor)
+            )
+    def forward(self, x):
+        return self.cab(x)
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (b, h, w, c)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*b, window_size, window_size, c)
+    """
+    b, h, w, c = x.shape
+    x = x.view(b, h // window_size, window_size, w // window_size, window_size, c)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, c)
+    return windows
+def window_reverse(windows, window_size, h, w):
+    """
+    Args:
+        windows: (num_windows*b, window_size, window_size, c)
+        window_size (int): Window size
+        h (int): Height of image
+        w (int): Width of image
+    Returns:
+        x: (b, h, w, c)
+    """
+    b = int(windows.shape[0] / (h * w / window_size / window_size))
+    x = windows.view(b, h // window_size, w // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(b, h, w, -1)
+    return x
+class WindowAttention(nn.Module):
+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, rpi, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*b, n, c)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        b_, n, c = x.shape
+        qkv = self.qkv(x).reshape(b_, n, 3, self.num_heads, c // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+        relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nw = mask.shape[0]
+            attn = attn.view(b_ // nw, nw, self.num_heads, n, n) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, n, n)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(b_, n, c)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class HAB(nn.Module):
+    r""" Hybrid Attention Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self,
+                 dim,
+                 input_resolution,
+                 num_heads,
+                 window_size=7,
+                 shift_size=0,
+                 compress_ratio=3,
+                 squeeze_factor=30,
+                 conv_scale=0.01,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU,
+                 norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, 'shift_size must in 0-window_size'
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim,
+            window_size=to_2tuple(self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop)
+        self.conv_scale = conv_scale
+        self.conv_block = CAB(num_feat=dim, compress_ratio=compress_ratio, squeeze_factor=squeeze_factor)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, x_size, rpi_sa, attn_mask):
+        h, w = x_size
+        b, _, c = x.shape
+        # assert seq_len == h * w, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(b, h, w, c)
+        # Conv_X
+        conv_x = self.conv_block(x.permute(0, 3, 1, 2))
+        conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(b, h * w, c)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = attn_mask
+        else:
+            shifted_x = x
+            attn_mask = None
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nw*b, window_size, window_size, c
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, c)  # nw*b, window_size*window_size, c
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        attn_windows = self.attn(x_windows, rpi=rpi_sa, mask=attn_mask)
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, c)
+        shifted_x = window_reverse(attn_windows, self.window_size, h, w)  # b h' w' c
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            attn_x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            attn_x = shifted_x
+        attn_x = attn_x.view(b, h * w, c)
+        # FFN
+        x = shortcut + self.drop_path(attn_x) + conv_x * self.conv_scale
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+class PatchMerging(nn.Module):
+    r""" Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x):
+        """
+        x: b, h*w, c
+        """
+        h, w = self.input_resolution
+        b, seq_len, c = x.shape
+        assert seq_len == h * w, 'input feature has wrong size'
+        assert h % 2 == 0 and w % 2 == 0, f'x size ({h}*{w}) are not even.'
+        x = x.view(b, h, w, c)
+        x0 = x[:, 0::2, 0::2, :]  # b h/2 w/2 c
+        x1 = x[:, 1::2, 0::2, :]  # b h/2 w/2 c
+        x2 = x[:, 0::2, 1::2, :]  # b h/2 w/2 c
+        x3 = x[:, 1::2, 1::2, :]  # b h/2 w/2 c
+        x = torch.cat([x0, x1, x2, x3], -1)  # b h/2 w/2 4*c
+        x = x.view(b, -1, 4 * c)  # b h/2*w/2 4*c
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+class OCAB(nn.Module):
+    # overlapping cross-attention block
+    def __init__(self, dim,
+                input_resolution,
+                window_size,
+                overlap_ratio,
+                num_heads,
+                qkv_bias=True,
+                qk_scale=None,
+                mlp_ratio=2,
+                norm_layer=nn.LayerNorm
+                ):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.window_size = window_size
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+        self.overlap_win_size = int(window_size * overlap_ratio) + window_size
+        self.norm1 = norm_layer(dim)
+        self.qkv = nn.Linear(dim, dim * 3,  bias=qkv_bias)
+        self.unfold = nn.Unfold(kernel_size=(self.overlap_win_size, self.overlap_win_size), stride=window_size, padding=(self.overlap_win_size-window_size)//2)
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((window_size + self.overlap_win_size - 1) * (window_size + self.overlap_win_size - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+        self.proj = nn.Linear(dim,dim)
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=nn.GELU)
+    def forward(self, x, x_size, rpi):
+        h, w = x_size
+        b, _, c = x.shape
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(b, h, w, c)
+        qkv = self.qkv(x).reshape(b, h, w, 3, c).permute(3, 0, 4, 1, 2) # 3, b, c, h, w
+        q = qkv[0].permute(0, 2, 3, 1) # b, h, w, c
+        kv = torch.cat((qkv[1], qkv[2]), dim=1) # b, 2*c, h, w
+        # partition windows
+        q_windows = window_partition(q, self.window_size)  # nw*b, window_size, window_size, c
+        q_windows = q_windows.view(-1, self.window_size * self.window_size, c)  # nw*b, window_size*window_size, c
+        kv_windows = self.unfold(kv) # b, c*w*w, nw
+        kv_windows = rearrange(kv_windows, 'b (nc ch owh oww) nw -> nc (b nw) (owh oww) ch', nc=2, ch=c, owh=self.overlap_win_size, oww=self.overlap_win_size).contiguous() # 2, nw*b, ow*ow, c
+        k_windows, v_windows = kv_windows[0], kv_windows[1] # nw*b, ow*ow, c
+        b_, nq, _ = q_windows.shape
+        _, n, _ = k_windows.shape
+        d = self.dim // self.num_heads
+        q = q_windows.reshape(b_, nq, self.num_heads, d).permute(0, 2, 1, 3) # nw*b, nH, nq, d
+        k = k_windows.reshape(b_, n, self.num_heads, d).permute(0, 2, 1, 3) # nw*b, nH, n, d
+        v = v_windows.reshape(b_, n, self.num_heads, d).permute(0, 2, 1, 3) # nw*b, nH, n, d
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+        relative_position_bias = self.relative_position_bias_table[rpi.view(-1)].view(
+            self.window_size * self.window_size, self.overlap_win_size * self.overlap_win_size, -1)  # ws*ws, wse*wse, nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, ws*ws, wse*wse
+        attn = attn + relative_position_bias.unsqueeze(0)
+        attn = self.softmax(attn)
+        attn_windows = (attn @ v).transpose(1, 2).reshape(b_, nq, self.dim)
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, self.dim)
+        x = window_reverse(attn_windows, self.window_size, h, w)  # b h w c
+        x = x.view(b, h * w, self.dim)
+        x = self.proj(x) + shortcut
+        x = x + self.mlp(self.norm2(x))
+        return x
+class AttenBlocks(nn.Module):
+    """ A series of attention blocks for one RHAG.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self,
+                 dim,
+                 input_resolution,
+                 depth,
+                 num_heads,
+                 window_size,
+                 compress_ratio,
+                 squeeze_factor,
+                 conv_scale,
+                 overlap_ratio,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList([
+            HAB(
+                dim=dim,
+                input_resolution=input_resolution,
+                num_heads=num_heads,
+                window_size=window_size,
+                shift_size=0 if (i % 2 == 0) else window_size // 2,
+                compress_ratio=compress_ratio,
+                squeeze_factor=squeeze_factor,
+                conv_scale=conv_scale,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop,
+                attn_drop=attn_drop,
+                drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                norm_layer=norm_layer) for i in range(depth)
+        ])
+        # OCAB
+        self.overlap_attn = OCAB(
+                            dim=dim,
+                            input_resolution=input_resolution,
+                            window_size=window_size,
+                            overlap_ratio=overlap_ratio,
+                            num_heads=num_heads,
+                            qkv_bias=qkv_bias,
+                            qk_scale=qk_scale,
+                            mlp_ratio=mlp_ratio,
+                            norm_layer=norm_layer
+                            )
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+    def forward(self, x, x_size, params):
+        for blk in self.blocks:
+            x = blk(x, x_size, params['rpi_sa'], params['attn_mask'])
+        x = self.overlap_attn(x, x_size, params['rpi_oca'])
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+class RHAG(nn.Module):
+    """Residual Hybrid Attention Group (RHAG).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(self,
+                 dim,
+                 input_resolution,
+                 depth,
+                 num_heads,
+                 window_size,
+                 compress_ratio,
+                 squeeze_factor,
+                 conv_scale,
+                 overlap_ratio,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 norm_layer=nn.LayerNorm,
+                 downsample=None,
+                 use_checkpoint=False,
+                 img_size=224,
+                 patch_size=4,
+                 resi_connection='1conv'):
+        super(RHAG, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = AttenBlocks(
+            dim=dim,
+            input_resolution=input_resolution,
+            depth=depth,
+            num_heads=num_heads,
+            window_size=window_size,
+            compress_ratio=compress_ratio,
+            squeeze_factor=squeeze_factor,
+            conv_scale=conv_scale,
+            overlap_ratio=overlap_ratio,
+            mlp_ratio=mlp_ratio,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            drop=drop,
+            attn_drop=attn_drop,
+            drop_path=drop_path,
+            norm_layer=norm_layer,
+            downsample=downsample,
+            use_checkpoint=use_checkpoint)
+        if resi_connection == '1conv':
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == 'identity':
+            self.conv = nn.Identity()
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim, norm_layer=None)
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim, norm_layer=None)
+    def forward(self, x, x_size, params):
+        return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size, params), x_size))) + x
+class PatchEmbed(nn.Module):
+    r""" Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)  # b Ph*Pw c
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+class PatchUnEmbed(nn.Module):
+    r""" Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        x = x.transpose(1, 2).contiguous().view(x.shape[0], self.embed_dim, x_size[0], x_size[1])  # b Ph*Pw c
+        return x
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
+        super(Upsample, self).__init__(*m)
+class HATArch(nn.Module):
+    r""" Hybrid Attention Transformer
+        A PyTorch implementation of : `Activating More Pixels in Image Super-Resolution Transformer`.
+        Some codes are based on SwinIR.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(self,
+                 img_size=64,
+                 patch_size=1,
+                 in_chans=3,
+                 embed_dim=96,
+                 depths=(6, 6, 6, 6),
+                 num_heads=(6, 6, 6, 6),
+                 window_size=7,
+                 compress_ratio=3,
+                 squeeze_factor=30,
+                 conv_scale=0.01,
+                 overlap_ratio=0.5,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop_rate=0.,
+                 attn_drop_rate=0.,
+                 drop_path_rate=0.1,
+                 norm_layer=nn.LayerNorm,
+                 ape=False,
+                 patch_norm=True,
+                 use_checkpoint=False,
+                 upscale=2,
+                 img_range=1.,
+                 upsampler='',
+                 resi_connection='1conv',
+                 **kwargs):
+        super(HATArch, self).__init__()
+        self.window_size = window_size
+        self.shift_size = window_size // 2
+        self.overlap_ratio = overlap_ratio
+        num_in_ch = in_chans
+        num_out_ch = in_chans
+        num_feat = 64
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        # relative position index
+        relative_position_index_SA = self.calculate_rpi_sa()
+        relative_position_index_OCA = self.calculate_rpi_oca()
+        self.register_buffer('relative_position_index_SA', relative_position_index_SA)
+        self.register_buffer('relative_position_index_OCA', relative_position_index_OCA)
+        # ------------------------- 1, shallow feature extraction ------------------------- #
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        # ------------------------- 2, deep feature extraction ------------------------- #
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # merge non-overlapping patches into image
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=embed_dim,
+            embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        # build Residual Hybrid Attention Groups (RHAG)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RHAG(
+                dim=embed_dim,
+                input_resolution=(patches_resolution[0], patches_resolution[1]),
+                depth=depths[i_layer],
+                num_heads=num_heads[i_layer],
+                window_size=window_size,
+                compress_ratio=compress_ratio,
+                squeeze_factor=squeeze_factor,
+                conv_scale=conv_scale,
+                overlap_ratio=overlap_ratio,
+                mlp_ratio=self.mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
+                norm_layer=norm_layer,
+                downsample=None,
+                use_checkpoint=use_checkpoint,
+                img_size=img_size,
+                patch_size=patch_size,
+                resi_connection=resi_connection)
+            self.layers.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == '1conv':
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == 'identity':
+            self.conv_after_body = nn.Identity()
+        # ------------------------- 3, high quality image reconstruction ------------------------- #
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(
+                nn.Conv2d(embed_dim, num_feat, 3, 1, 1), nn.LeakyReLU(inplace=True))
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    def calculate_rpi_sa(self):
+        # calculate relative position index for SA
+        coords_h = torch.arange(self.window_size)
+        coords_w = torch.arange(self.window_size)
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        return relative_position_index
+    def calculate_rpi_oca(self):
+        # calculate relative position index for OCA
+        window_size_ori = self.window_size
+        window_size_ext = self.window_size + int(self.overlap_ratio * self.window_size)
+        coords_h = torch.arange(window_size_ori)
+        coords_w = torch.arange(window_size_ori)
+        coords_ori = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, ws, ws
+        coords_ori_flatten = torch.flatten(coords_ori, 1)  # 2, ws*ws
+        coords_h = torch.arange(window_size_ext)
+        coords_w = torch.arange(window_size_ext)
+        coords_ext = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, wse, wse
+        coords_ext_flatten = torch.flatten(coords_ext, 1)  # 2, wse*wse
+        relative_coords = coords_ext_flatten[:, None, :] - coords_ori_flatten[:, :, None]   # 2, ws*ws, wse*wse
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # ws*ws, wse*wse, 2
+        relative_coords[:, :, 0] += window_size_ori - window_size_ext + 1  # shift to start from 0
+        relative_coords[:, :, 1] += window_size_ori - window_size_ext + 1
+        relative_coords[:, :, 0] *= window_size_ori + window_size_ext - 1
+        relative_position_index = relative_coords.sum(-1)
+        return relative_position_index
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        h, w = x_size
+        img_mask = torch.zeros((1, h, w, 1))  # 1 h w 1
+        h_slices = (slice(0, -self.window_size), slice(-self.window_size,
+                                                       -self.shift_size), slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size), slice(-self.window_size,
+                                                       -self.shift_size), slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(img_mask, self.window_size)  # nw, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        return attn_mask
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'absolute_pos_embed'}
+    @torch.jit.ignore
+    def no_weight_decay_keywords(self):
+        return {'relative_position_bias_table'}
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        # Calculate attention mask and relative position index in advance to speed up inference.
+        # The original code is very time-consuming for large window size.
+        attn_mask = self.calculate_mask(x_size).to(x.device)
+        params = {'attn_mask': attn_mask, 'rpi_sa': self.relative_position_index_SA, 'rpi_oca': self.relative_position_index_OCA}
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size, params)
+        x = self.norm(x)  # b seq_len c
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        x = x / self.img_range + self.mean
+        return x

SR_Inference/hat/weights/HAT-L_SRx2_ImageNet-pretrain.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2818c7ca8d72ec4cc5f31c93203d55252a662dd35cda34ce1a69661f97dcd38f
+size 165182573

SR_Inference/hat/weights/HAT_SRx2_ImageNet-pretrain.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82ebd911263bcc886fbef46b30cf97b92a932a27a3cba30163d4577afb09b9d7
+size 84546053

SR_Inference/hat/weights/HAT_SRx4_ImageNet-pretrain.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ee053c42461187846dc0e93aa5abd34591c0725a8e044a59000e92ee215e833
+size 85137601

SR_Inference/inference_codeformer.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import cv2
+import sys
+import torch
+import os.path as osp
+from basicsr.utils import img2tensor, tensor2img
+from torchvision.transforms.functional import normalize
+from facexlib.utils.face_restoration_helper import FaceRestoreHelper
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from SR_Inference.codeformer.codeformer_arch import CodeFormerArch
+from SR_Inference.inference_sr_utils import RealEsrUpsamplerZoo
+class CodeFormer:
+    def __init__(
+        self,
+        upscale=2,
+        bg_upsampler_name="realesrgan",
+        prefered_net_in_upsampler="RRDBNet",
+        fidelity_weight=0.8,
+    ):
+        self.upscale = int(upscale)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.fidelity_weight = fidelity_weight
+        # ------------------------ set up background upsampler ------------------------
+        upsampler_zoo = RealEsrUpsamplerZoo(
+            upscale=self.upscale,
+            bg_upsampler_name=bg_upsampler_name,
+            prefered_net_in_upsampler=prefered_net_in_upsampler,
+        )
+        self.bg_upsampler = upsampler_zoo.bg_upsampler
+        # ------------------ set up FaceRestoreHelper -------------------
+        gfpgan_weights_path = os.path.join(
+            ROOT_DIR, "SR_Inference", "gfpgan", "weights"
+        )
+        self.face_restorer_helper = FaceRestoreHelper(
+            upscale_factor=self.upscale,
+            face_size=512,
+            crop_ratio=(1, 1),
+            det_model="retinaface_resnet50",
+            save_ext="png",
+            use_parse=True,
+            device=self.device,
+            # model_rootpath="gfpgan/weights",
+            model_rootpath=gfpgan_weights_path,
+        )
+        # ------------------ load model -------------------
+        self.sr_model = CodeFormerArch().to(self.device)
+        ckpt_path = os.path.join(
+            ROOT_DIR, "SR_Inference", "codeformer", "weights", "codeformer_v0.1.0.pth"
+        )
+        loadnet = torch.load(ckpt_path, map_location=self.device)
+        if "params_ema" in loadnet:
+            keyname = "params_ema"
+        else:
+            keyname = "params"
+        self.sr_model.load_state_dict(loadnet[keyname])
+        self.sr_model.eval()
+    @torch.no_grad()
+    def __call__(self, img):
+        bg_img = self.bg_upsampler.enhance(img, outscale=self.upscale)[0]
+        self.face_restorer_helper.clean_all()
+        self.face_restorer_helper.read_image(img)
+        self.face_restorer_helper.get_face_landmarks_5(
+            only_keep_largest=True, only_center_face=False, eye_dist_threshold=5
+        )
+        self.face_restorer_helper.align_warp_face()
+        if len(self.face_restorer_helper.cropped_faces) > 0:
+            cropped_face = self.face_restorer_helper.cropped_faces[0]
+            cropped_face_t = img2tensor(
+                imgs=cropped_face / 255.0, bgr2rgb=True, float32=True
+            )
+            normalize(
+                tensor=cropped_face_t,
+                mean=(0.5, 0.5, 0.5),
+                std=(0.5, 0.5, 0.5),
+                inplace=True,
+            )
+            cropped_face_t = cropped_face_t.unsqueeze(0).to(self.device)
+            # ------------------- restore/enhance image using CodeFormerArch model -------------------
+            output = self.sr_model(cropped_face_t, w=self.fidelity_weight, adain=True)[
+                0
+            ]
+            restored_face = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
+            restored_face = restored_face.astype("uint8")
+            self.face_restorer_helper.add_restored_face(restored_face)
+            self.face_restorer_helper.get_inverse_affine(None)
+            sr_img = self.face_restorer_helper.paste_faces_to_input_image(
+                upsample_img=bg_img
+            )
+        else:
+            sr_img = bg_img
+        return sr_img
+if __name__ == "__main__":
+    codeformer = CodeFormer(upscale=2, fidelity_weight=1.0)
+    img = cv2.imread(f"{ROOT_DIR}/data/EyeDentify/Wo_SR/original/1/1/frame_01.png")
+    sr_img = codeformer(img=img)
+    saving_dir = f"{ROOT_DIR}/rough_works/SR_imgs"
+    os.makedirs(saving_dir, exist_ok=True)
+    cv2.imwrite(f"{saving_dir}/sr_img.png", sr_img)

SR_Inference/inference_gfpgan.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os
+import cv2
+import sys
+import torch
+import os.path as osp
+from gfpgan import GFPGANer
+from basicsr.utils.download_util import load_file_from_url
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from SR_Inference.inference_sr_utils import RealEsrUpsamplerZoo
+class GFPGAN:
+    def __init__(
+        self,
+        upscale=2,
+        bg_upsampler_name="realesrgan",
+        prefered_net_in_upsampler="RRDBNet",
+    ):
+        upscale = int(upscale)
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # ------------------------ set up background upsampler ------------------------
+        upsampler_zoo = RealEsrUpsamplerZoo(
+            upscale=upscale,
+            bg_upsampler_name=bg_upsampler_name,
+            prefered_net_in_upsampler=prefered_net_in_upsampler,
+        )
+        bg_upsampler = upsampler_zoo.bg_upsampler
+        # ------------------------ load model ------------------------
+        gfpgan_weights_path = os.path.join(
+            ROOT_DIR, "SR_Inference", "gfpgan", "weights"
+        )
+        gfpgan_model_path = os.path.join(gfpgan_weights_path, "GFPGANv1.3.pth")
+        if not os.path.isfile(gfpgan_model_path):
+            url = "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth"
+            gfpgan_model_path = load_file_from_url(
+                url=url,
+                model_dir=gfpgan_weights_path,
+                progress=True,
+                file_name="GFPGANv1.3.pth",
+            )
+        self.sr_model = GFPGANer(
+            upscale=upscale,
+            bg_upsampler=bg_upsampler,
+            model_path=gfpgan_model_path,
+            device=device,
+        )
+    def __call__(self, img):
+        # ------------------------ restore/enhance image using GFPGAN model ------------------------
+        cropped_faces, sr_faces, sr_img = self.sr_model.enhance(img)
+        return sr_img
+if __name__ == "__main__":
+    gfpgan = GFPGAN(
+        upscale=2, bg_upsampler_name="realesrgan", prefered_net_in_upsampler="RRDBNet"
+    )
+    img = cv2.imread(f"{ROOT_DIR}/data/EyeDentify/Wo_SR/original/1/1/frame_01.png")
+    sr_img = gfpgan(img=img)
+    saving_dir = f"{ROOT_DIR}/rough_works/SR_imgs"
+    os.makedirs(saving_dir, exist_ok=True)
+    cv2.imwrite(f"{saving_dir}/sr_img_gfpgan.png", sr_img)

SR_Inference/inference_hat.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import cv2
+import sys
+import torch
+import numpy as np
+import os.path as osp
+from PIL import Image
+from basicsr.utils import img2tensor
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from SR_Inference.hat.hat_arch import HATArch
+class HAT:
+    def __init__(
+        self,
+        upscale=2,
+        in_chans=3,
+        img_size=(480, 640),
+        window_size=16,
+        compress_ratio=3,
+        squeeze_factor=30,
+        conv_scale=0.01,
+        overlap_ratio=0.5,
+        img_range=1.0,
+        depths=[6, 6, 6, 6, 6, 6],
+        embed_dim=180,
+        num_heads=[6, 6, 6, 6, 6, 6],
+        mlp_ratio=2,
+        upsampler="pixelshuffle",
+        resi_connection="1conv",
+    ):
+        upscale = int(upscale)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # ------------------ load model for img enhancement -------------------
+        self.sr_model = HATArch(
+            img_size=img_size,
+            upscale=upscale,
+            in_chans=in_chans,
+            window_size=window_size,
+            compress_ratio=compress_ratio,
+            squeeze_factor=squeeze_factor,
+            conv_scale=conv_scale,
+            overlap_ratio=overlap_ratio,
+            img_range=img_range,
+            depths=depths,
+            embed_dim=embed_dim,
+            num_heads=num_heads,
+            mlp_ratio=mlp_ratio,
+            upsampler=upsampler,
+            resi_connection=resi_connection,
+        ).to(self.device)
+        ckpt_path = os.path.join(
+            ROOT_DIR,
+            "SR_Inference",
+            "hat",
+            "weights",
+            f"HAT_SRx{str(upscale)}_ImageNet-pretrain.pth",
+        )
+        loadnet = torch.load(ckpt_path, map_location=self.device)
+        if "params_ema" in loadnet:
+            keyname = "params_ema"
+        else:
+            keyname = "params"
+        self.sr_model.load_state_dict(loadnet[keyname])
+        self.sr_model.eval()
+    @torch.no_grad()
+    def __call__(self, img):
+        img_tensor = (
+            img2tensor(imgs=img / 255.0, bgr2rgb=True, float32=True)
+            .unsqueeze(0)
+            .to(self.device)
+        )
+        restored_img = self.sr_model(img_tensor)[0]
+        restored_img = restored_img.permute(1, 2, 0).cpu().numpy()
+        restored_img = (restored_img - restored_img.min()) / (
+            restored_img.max() - restored_img.min()
+        )
+        restored_img = (restored_img * 255).astype(np.uint8)
+        restored_img = Image.fromarray(restored_img)
+        restored_img = np.array(restored_img)
+        sr_img = cv2.cvtColor(restored_img, cv2.COLOR_RGB2BGR)
+        return sr_img
+if __name__ == "__main__":
+    hat = HAT(upscale=2)
+    img = cv2.imread(f"{ROOT_DIR}/data/EyeDentify/Wo_SR/original/1/1/frame_01.png")
+    sr_img = hat(img=img)
+    saving_dir = f"{ROOT_DIR}/rough_works/SR_imgs"
+    os.makedirs(saving_dir, exist_ok=True)
+    cv2.imwrite(f"{saving_dir}/sr_img_hat.png", sr_img)

SR_Inference/inference_realesr.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import cv2
+import sys
+import torch
+import os.path as osp
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from SR_Inference.inference_sr_utils import RealEsrUpsamplerZoo
+class RealEsr:
+    def __init__(
+        self,
+        upscale=2,
+        bg_upsampler_name="realesrgan",
+        prefered_net_in_upsampler="RRDBNet",
+    ):
+        self.upscale = int(upscale)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # ------------------------ set up background upsampler ------------------------
+        self.upsampler_zoo = RealEsrUpsamplerZoo(
+            upscale=self.upscale,
+            bg_upsampler_name=bg_upsampler_name,
+            prefered_net_in_upsampler=prefered_net_in_upsampler,
+        )
+        self.bg_upsampler = self.upsampler_zoo.bg_upsampler
+    def __call__(self, img):
+        # ---------------- restore/enhance image using the selected RealESR model ----------------
+        sr_img = self.bg_upsampler.enhance(img, outscale=self.upscale)[0]
+        return sr_img
+if __name__ == "__main__":
+    realesr = RealEsr(
+        upscale=2, bg_upsampler_name="realesrgan", prefered_net_in_upsampler="RRDBNet"
+    )
+    img = cv2.imread(f"{ROOT_DIR}/data/EyeDentify/Wo_SR/original/1/1/frame_01.png")
+    sr_img = realesr(img=img)
+    saving_dir = f"{ROOT_DIR}/rough_works/SR_imgs"
+    os.makedirs(saving_dir, exist_ok=True)
+    cv2.imwrite(f"{saving_dir}/sr_img.png", sr_img)

SR_Inference/inference_sr_utils.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import cv2
+import torch
+from realesrgan import RealESRGANer
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from realesrgan.archs.srvgg_arch import SRVGGNetCompact
+from basicsr.utils.download_util import load_file_from_url
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+class RealEsrUpsamplerZoo:
+    def __init__(
+        self,
+        upscale=2,
+        bg_upsampler_name="realesrgan",
+        prefered_net_in_upsampler="RRDBNet",
+    ):
+        self.upscale = int(upscale)
+        # ------------------------ set up background upsampler ------------------------
+        weights_path = os.path.join(
+            ROOT_DIR, "SR_Inference", f"{bg_upsampler_name}", "weights"
+        )
+        if bg_upsampler_name == "realesrgan":
+            model = self.get_prefered_net(prefered_net_in_upsampler, upscale)
+            if self.upscale == 2:
+                model_path = os.path.join(weights_path, "RealESRGAN_x2plus.pth")
+                url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth"
+            elif self.upscale == 4:
+                model_path = os.path.join(weights_path, "RealESRGAN_x4plus.pth")
+                url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth"
+            else:
+                raise Exception(
+                    f"{bg_upsampler_name} model not available for upscaling x{str(self.upscale)}"
+                )
+        elif bg_upsampler_name == "realesrnet":
+            model = self.get_prefered_net(prefered_net_in_upsampler, upscale)
+            if self.upscale == 4:
+                model_path = os.path.join(weights_path, "RealESRNet_x4plus.pth")
+                url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth"
+            else:
+                raise Exception(
+                    f"{bg_upsampler_name} model not available for upscaling x{str(self.upscale)}"
+                )
+        elif bg_upsampler_name == "anime":
+            model = self.get_prefered_net(prefered_net_in_upsampler, upscale)
+            if self.upscale == 4:
+                model_path = os.path.join(
+                    weights_path, "RealESRGAN_x4plus_anime_6B.pth"
+                )
+                url = "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth"
+            else:
+                raise Exception(
+                    f"{bg_upsampler_name} model not available for upscaling x{str(self.upscale)}"
+                )
+        else:
+            raise Exception(f"No model implemented for: {bg_upsampler_name}")
+        # ------------------------ load background upsampler model ------------------------
+        if not os.path.isfile(model_path):
+            model_path = load_file_from_url(
+                url=url, model_dir=weights_path, progress=True, file_name=None
+            )
+        self.bg_upsampler = RealESRGANer(
+            scale=int(upscale),
+            model_path=model_path,
+            model=model,
+            tile=0,
+            tile_pad=0,
+            pre_pad=0,
+            half=False,
+        )
+    @staticmethod
+    def get_prefered_net(prefered_net_in_upsampler, upscale=2):
+        if prefered_net_in_upsampler == "RRDBNet":
+            model = RRDBNet(
+                num_in_ch=3,
+                num_out_ch=3,
+                num_feat=64,
+                num_block=23,
+                num_grow_ch=32,
+                scale=int(upscale),
+            )
+        elif prefered_net_in_upsampler == "SRVGGNetCompact":
+            model = SRVGGNetCompact(
+                num_in_ch=3,
+                num_out_ch=3,
+                num_feat=64,
+                num_conv=16,
+                upscale=int(upscale),
+                act_type="prelu",
+            )
+        else:
+            raise Exception(f"No net named: {prefered_net_in_upsampler} implemented!")
+        return model

SR_Inference/inference_srresnet.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import cv2
+import sys
+import torch
+import numpy as np
+import os.path as osp
+from PIL import Image
+from basicsr.utils import img2tensor
+from basicsr.archs.srresnet_arch import MSRResNet
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+class SRResNet:
+    def __init__(self, upscale=2, num_in_ch=3, num_out_ch=3, num_feat=64, num_block=16):
+        self.upscale = int(upscale)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # ------------------ load model for img enhancement -------------------
+        self.sr_model = MSRResNet(
+            upscale=self.upscale,
+            num_in_ch=num_in_ch,
+            num_out_ch=num_out_ch,
+            num_feat=num_feat,
+            num_block=num_block,
+        ).to(self.device)
+        ckpt_path = os.path.join(
+            ROOT_DIR,
+            "SR_Inference",
+            "srresnet",
+            "weights",
+            f"SRResNet_{str(self.upscale)}x.pth",
+        )
+        loadnet = torch.load(ckpt_path, map_location=self.device)
+        if "params_ema" in loadnet:
+            keyname = "params_ema"
+        else:
+            keyname = "params"
+        self.sr_model.load_state_dict(loadnet[keyname])
+        self.sr_model.eval()
+    @torch.no_grad()
+    def __call__(self, img):
+        img_tensor = (
+            img2tensor(imgs=img / 255.0, bgr2rgb=True, float32=True)
+            .unsqueeze(0)
+            .to(self.device)
+        )
+        restored_img = self.sr_model(img_tensor)[0]
+        restored_img = restored_img.permute(1, 2, 0).cpu().numpy()
+        restored_img = (restored_img - restored_img.min()) / (
+            restored_img.max() - restored_img.min()
+        )
+        restored_img = (restored_img * 255).astype(np.uint8)
+        restored_img = Image.fromarray(restored_img)
+        restored_img = np.array(restored_img)
+        sr_img = cv2.cvtColor(restored_img, cv2.COLOR_RGB2BGR)
+        return sr_img
+if __name__ == "__main__":
+    srresnet = SRResNet(upscale=2)
+    img = cv2.imread(f"{ROOT_DIR}/data/EyeDentify/Wo_SR/original/1/1/frame_01.png")
+    sr_img = srresnet(img=img)
+    saving_dir = f"{ROOT_DIR}/rough_works/SR_imgs"
+    os.makedirs(saving_dir, exist_ok=True)
+    cv2.imwrite(f"{saving_dir}/sr_img_srresnet.png", sr_img)

SR_Inference/realesrgan/weights/RealESRGAN_x2plus.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49fafd45f8fd7aa8d31ab2a22d14d91b536c34494a5cfe31eb5d89c2fa266abb
+size 67061725

SR_Inference/realesrgan/weights/RealESRGAN_x4plus.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa0d38905f75ac06eb49a7951b426670021be3018265fd191d2125df9d682f1
+size 67040989

SR_Inference/srresnet/weights/SRResNet_2x.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d2a531ecd6e8f15cc5eccf4bca58cdfea69f76c09baa1b208694977b0f6f5e
+size 5492202

SR_Inference/srresnet/weights/SRResNet_4x.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:112f2ec947bb497b0350b149a1e06c3f73de77497cec64ce0c7ba268b8398023
+size 6083374

app.py CHANGED Viewed

@@ -4,8 +4,9 @@
 from io import BytesIO
 import os
 import sys
 import matplotlib.pyplot as plt
-import requests
 import streamlit as st
 import torch
 from PIL import Image
@@ -21,6 +22,7 @@ import os.path as osp
 root_path = osp.abspath(osp.join(__file__, osp.pardir))
 sys.path.append(root_path)
 from utils import get_model
 from registry_utils import import_registered_modules
@@ -39,15 +41,13 @@ CAM_METHODS = [
     # "LayerCAM",
 ]
 TV_MODELS = [
-    "resnet18",
-    # "resnet50",
-]
-SR_METHODS = ["GFPGAN", "RealESRGAN", "SRResNet", "CodeFormer", "HAT"]
-UPSCALE = ["2", "3", "4"]
-LABEL_MAP = [
-    "left_eye",
-    "right_eye",
 ]
 @torch.no_grad()
@@ -79,150 +79,287 @@ def main():
     # Sidebar
     # File selection
-    st.sidebar.title("Input selection")
     # Disabling warning
     st.set_option("deprecation.showfileUploaderEncoding", False)
     # Choose your own image
     uploaded_file = st.sidebar.file_uploader(
-        "Upload files", type=["png", "jpeg", "jpg"]
     )
     if uploaded_file is not None:
-        img = Image.open(BytesIO(uploaded_file.read()), mode="r").convert("RGB")
-        cols[0].image(img, use_column_width=True)
     # Model selection
-    st.sidebar.title("Setup")
     tv_model = st.sidebar.selectbox(
         "Classification model",
         TV_MODELS,
-        help="Supported models from Torchvision",
     )
-    # class_choices = [
-    #     f"{idx + 1} - {class_name}" for idx, class_name in enumerate(LABEL_MAP)
-    # ]
-    # class_selection = st.sidebar.selectbox(
-    #     "Class selection", ["Predicted class (argmax)", *class_choices]
     # )
-    img_configs = {"img_size": [32, 64], "means": None, "stds": None}
-    # For newline
     st.sidebar.write("\n")
-    if st.sidebar.button("Compute CAM"):
         if uploaded_file is None:
             st.sidebar.error("Please upload an image first")
         else:
             with st.spinner("Analyzing..."):
-                preprocess_steps = [transforms.ToTensor()]
-                image_size = img_configs["img_size"]
-                if image_size is not None:
-                    preprocess_steps.append(
-                        transforms.Resize(
-                            [image_size[0], image_size[-1]],
-                            interpolation=transforms.InterpolationMode.BICUBIC,
-                            antialias=True,
                         )
                     )
-                means = img_configs["means"]
-                stds = img_configs["stds"]
-                if means is not None and stds is not None:
-                    preprocess_steps.append(transforms.Normalize(means, stds))
-                preprocess_function = transforms.Compose(preprocess_steps)
-                input_img = preprocess_function(img)
-                input_img = input_img.unsqueeze(0).to(device="cpu")
-                model_configs = {
-                    "model_path": root_path
-                    + "/pre_trained_models/ResNet18/left_eye.pt",
-                    "registered_model_name": "ResNet18",
-                    "num_classes": 1,
-                }
-                registered_model_name = model_configs["registered_model_name"]
-                # default_layer = ""
-                if tv_model is not None:
-                    with st.spinner("Loading model..."):
-                        model = _load_model(model_configs)
-                if torch.cuda.is_available():
-                    model = model.cuda()
-                if registered_model_name == "ResNet18":
-                    target_layer = model.resnet.layer4[-1].conv2
-                elif registered_model_name == "ResNet50":
-                    target_layer = model.resnet.layer4[-1].conv3
-                else:
-                    raise Exception(
-                        f"No target layer available for selected model: {registered_model_name}"
                     )
-                # target_layer = st.sidebar.text_input(
-                #     "Target layer",
-                #     default_layer,
-                #     help='If you want to target several layers, add a "+" separator (e.g. "layer3+layer4")',
-                # )
-                cam_method = "CAM"
-                # cam_method = st.sidebar.selectbox(
-                #     "CAM method",
-                #     CAM_METHODS,
-                #     help="The way your class activation map will be computed",
-                # )
-                if cam_method is not None:
-                    # cam_extractor = methods.__dict__[cam_method](
-                    #     model,
-                    #     target_layer=(
-                    #         [s.strip() for s in target_layer.split("+")]
-                    #         if len(target_layer) > 0
-                    #         else None
-                    #     ),
-                    # )
-                    cam_extractor = torchcam_methods.__dict__[cam_method](
-                        model,
-                        target_layer=target_layer,
-                        fc_layer=model.resnet.fc,
-                        input_shape=(3, 32, 64),
                     )
-                    # with torch.no_grad():
-                    #     if input_mask is not None:
-                    #         out = self.model(input_img, input_mask)
-                    #     else:
-                    #         out = self.model(input_img)
-                    # activation_map = cam_extractor(class_idx=target_class)
-                # Forward the image to the model
-                out = model(input_img)
-                print("out = ", out)
-                # Select the target class
-                # if class_selection == "Predicted class (argmax)":
-                #     class_idx = out.squeeze(0).argmax().item()
-                # else:
-                #     class_idx = LABEL_MAP.index(class_selection.rpartition(" - ")[-1])
-                # Retrieve the CAM
-                # act_maps = cam_extractor(class_idx=target_class)
-                act_maps = cam_extractor(0, out)
-                # Fuse the CAMs if there are several
-                activation_map = (
-                    act_maps[0]
-                    if len(act_maps) == 1
-                    else cam_extractor.fuse_cams(act_maps)
-                )
-                # Overlayed CAM
-                fig, ax = plt.subplots()
-                result = overlay_mask(
-                    img, to_pil_image(activation_map, mode="F"), alpha=0.5
-                )
-                ax.imshow(result)
-                ax.axis("off")
-                cols[-1].pyplot(fig)
 if __name__ == "__main__":

 from io import BytesIO
 import os
 import sys
+import cv2
 import matplotlib.pyplot as plt
+import numpy as np
 import streamlit as st
 import torch
 from PIL import Image
 root_path = osp.abspath(osp.join(__file__, osp.pardir))
 sys.path.append(root_path)
+from preprocessing.dataset_creation import EyeDentityDatasetCreation
 from utils import get_model
 from registry_utils import import_registered_modules
     # "LayerCAM",
 ]
 TV_MODELS = [
+    "ResNet18",
+    "ResNet50",
 ]
+SR_METHODS = ["GFPGAN", "CodeFormer", "RealESRGAN", "SRResNet", "HAT"]
+UPSCALE = [2, 4]
+UPSCALE_METHODS = ["BILINEAR", "BICUBIC"]
+LABEL_MAP = ["left_pupil", "right_pupil"]
 @torch.no_grad()
     # Sidebar
     # File selection
+    st.sidebar.title("Upload Face or Eye")
     # Disabling warning
     st.set_option("deprecation.showfileUploaderEncoding", False)
     # Choose your own image
     uploaded_file = st.sidebar.file_uploader(
+        "Upload Image", type=["png", "jpeg", "jpg"]
     )
     if uploaded_file is not None:
+        input_img = Image.open(BytesIO(uploaded_file.read()), mode="r").convert("RGB")
+        # print("input_img before = ", input_img.size)
+        max_size = [input_img.size[0], input_img.size[1]]
+        cols[0].text(f"Input Image: {max_size[0]} x {max_size[1]}")
+        if input_img.size[0] == input_img.size[1] and input_img.size[0] >= 256:
+            max_size[0] = 256
+            max_size[1] = 256
+        else:
+            if input_img.size[0] >= 640:
+                max_size[0] = 640
+            elif input_img.size[0] < 64:
+                max_size[0] = 64
+            if input_img.size[1] >= 480:
+                max_size[1] = 480
+            elif input_img.size[1] < 32:
+                max_size[1] = 32
+        input_img.thumbnail((max_size[0], max_size[1]))  # Bicubic resampling
+        # print("input_img after = ", input_img.size)
+        # cols[0].image(input_img)
+        fig0, axs0 = plt.subplots(1, 1, figsize=(10, 10))
+        # Display the input image
+        axs0.imshow(input_img)
+        axs0.axis("off")
+        axs0.set_title("Input Image")
+        # Display the plot
+        cols[0].pyplot(fig0)
+        cols[0].text(f"Input Image Resized: {max_size[0]} x {max_size[1]}")
+    st.sidebar.title("Setup")
+    # Upscale selection
+    upscale = "-"
+    # upscale = st.sidebar.selectbox(
+    #     "Upscale",
+    #     ["-"] + UPSCALE,
+    #     help="Upscale the uploaded image 2 or 4 times. Keep blank for no upscaling",
+    # )
+    # Upscale method selection
+    if upscale != "-":
+        upscale_method_or_model = st.sidebar.selectbox(
+            "Upscale Method / Model",
+            UPSCALE_METHODS + SR_METHODS,
+            help="Select a method or model to upscale the uploaded image",
+        )
+    else:
+        upscale_method_or_model = None
+    # Pupil selection
+    pupil_selection = st.sidebar.selectbox(
+        "Pupil Selection",
+        ["-"] + LABEL_MAP,
+        help="Select left or right pupil OR keep blank for both pupil diameter estimation",
+    )
     # Model selection
     tv_model = st.sidebar.selectbox(
         "Classification model",
         TV_MODELS,
+        help="Supported Models for Pupil Diameter Estimation",
     )
+    cam_method = "CAM"
+    # cam_method = st.sidebar.selectbox(
+    #     "CAM method",
+    #     CAM_METHODS,
+    #     help="The way your class activation map will be computed",
+    # )
+    # target_layer = st.sidebar.text_input(
+    #     "Target layer",
+    #     default_layer,
+    #     help='If you want to target several layers, add a "+" separator (e.g. "layer3+layer4")',
     # )
     st.sidebar.write("\n")
+    if st.sidebar.button("Predict Diameter & Compute CAM"):
         if uploaded_file is None:
             st.sidebar.error("Please upload an image first")
         else:
             with st.spinner("Analyzing..."):
+                if upscale == "-":
+                    sr_configs = None
+                else:
+                    sr_configs = {
+                        "method": upscale_method_or_model,
+                        "params": {"upscale": upscale},
+                    }
+                config_file = {
+                    "sr_configs": sr_configs,
+                    "feature_extraction_configs": {
+                        "blink_detection": False,
+                        "upscale": upscale,
+                        "extraction_library": "mediapipe",
+                    },
+                }
+                img = np.array(input_img)
+                # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                # if img.shape[0] > max_size or img.shape[1] > max_size:
+                #     img = cv2.resize(img, (max_size, max_size))
+                ds_results = EyeDentityDatasetCreation(
+                    feature_extraction_configs=config_file[
+                        "feature_extraction_configs"
+                    ],
+                    sr_configs=config_file["sr_configs"],
+                )(img)
+                # if ds_results is not None:
+                # print("ds_results = ", ds_results.keys())
+                preprocess_steps = [
+                    transforms.ToTensor(),
+                    transforms.Resize(
+                        [32, 64],
+                        # interpolation=transforms.InterpolationMode.BILINEAR,
+                        interpolation=transforms.InterpolationMode.BICUBIC,
+                        antialias=True,
+                    ),
+                ]
+                preprocess_function = transforms.Compose(preprocess_steps)
+                left_eye = None
+                right_eye = None
+                if ds_results is None:
+                    # print("type of input_img = ", type(input_img))
+                    input_img = preprocess_function(input_img)
+                    input_img = input_img.unsqueeze(0)
+                    if pupil_selection == "left_pupil":
+                        left_eye = input_img
+                    elif pupil_selection == "right_pupil":
+                        right_eye = input_img
+                    else:
+                        left_eye = input_img
+                        right_eye = input_img
+                    # print("type of left_eye = ", type(left_eye))
+                    # print("type of right_eye = ", type(right_eye))
+                elif "eyes" in ds_results.keys():
+                    if (
+                        "left_eye" in ds_results["eyes"].keys()
+                        and ds_results["eyes"]["left_eye"] is not None
+                    ):
+                        left_eye = ds_results["eyes"]["left_eye"]
+                        # print("type of left_eye = ", type(left_eye))
+                        left_eye = to_pil_image(left_eye).convert("RGB")
+                        # print("type of left_eye = ", type(left_eye))
+                        left_eye = preprocess_function(left_eye)
+                        # print("type of left_eye = ", type(left_eye))
+                        left_eye = left_eye.unsqueeze(0)
+                    if (
+                        "right_eye" in ds_results["eyes"].keys()
+                        and ds_results["eyes"]["right_eye"] is not None
+                    ):
+                        right_eye = ds_results["eyes"]["right_eye"]
+                        # print("type of right_eye = ", type(right_eye))
+                        right_eye = to_pil_image(right_eye).convert("RGB")
+                        # print("type of right_eye = ", type(right_eye))
+                        right_eye = preprocess_function(right_eye)
+                        # print("type of right_eye = ", type(right_eye))
+                        right_eye = right_eye.unsqueeze(0)
+                else:
+                    # print("type of input_img = ", type(input_img))
+                    input_img = preprocess_function(input_img)
+                    input_img = input_img.unsqueeze(0)
+                    if pupil_selection == "left_pupil":
+                        left_eye = input_img
+                    elif pupil_selection == "right_pupil":
+                        right_eye = input_img
+                    else:
+                        left_eye = input_img
+                        right_eye = input_img
+                    # print("type of left_eye = ", type(left_eye))
+                    # print("type of right_eye = ", type(right_eye))
+                # print("left_eye = ", left_eye.shape)
+                # print("right_eye = ", right_eye.shape)
+                if pupil_selection == "-":
+                    selected_eyes = ["left_eye", "right_eye"]
+                elif pupil_selection == "left_pupil":
+                    selected_eyes = ["left_eye"]
+                elif pupil_selection == "right_pupil":
+                    selected_eyes = ["right_eye"]
+                for eye_type in selected_eyes:
+                    model_configs = {
+                        "model_path": root_path
+                        + f"/pre_trained_models/{tv_model}/{eye_type}.pt",
+                        "registered_model_name": tv_model,
+                        "num_classes": 1,
+                    }
+                    registered_model_name = model_configs["registered_model_name"]
+                    model = _load_model(model_configs)
+                    if registered_model_name == "ResNet18":
+                        target_layer = model.resnet.layer4[-1].conv2
+                    elif registered_model_name == "ResNet50":
+                        target_layer = model.resnet.layer4[-1].conv3
+                    else:
+                        raise Exception(
+                            f"No target layer available for selected model: {registered_model_name}"
+                        )
+                    if left_eye is not None and eye_type == "left_eye":
+                        input_img = left_eye
+                    elif right_eye is not None and eye_type == "right_eye":
+                        input_img = right_eye
+                    else:
+                        raise Exception("Wrong Data")
+                    if cam_method is not None:
+                        cam_extractor = torchcam_methods.__dict__[cam_method](
+                            model,
+                            target_layer=target_layer,
+                            fc_layer=model.resnet.fc,
+                            input_shape=input_img.shape,
                         )
+                    # with torch.no_grad():
+                    out = model(input_img)
+                    cols[-1].markdown(
+                        f"<h3>Predicted Pupil Diameter: {out[0].item():.2f} mm</h3>",
+                        unsafe_allow_html=True,
                     )
+                    # cols[-1].text(f"Predicted Pupil Diameter: {out[0].item():.2f}")
+                    # Retrieve the CAM
+                    act_maps = cam_extractor(0, out)
+                    # Fuse the CAMs if there are several
+                    activation_map = (
+                        act_maps[0]
+                        if len(act_maps) == 1
+                        else cam_extractor.fuse_cams(act_maps)
                     )
+                    # Convert input image and activation map to PIL images
+                    input_image_pil = to_pil_image(input_img.squeeze(0))
+                    activation_map_pil = to_pil_image(activation_map, mode="F")
+                    # Create the overlayed CAM result
+                    result = overlay_mask(
+                        input_image_pil,
+                        activation_map_pil,
+                        alpha=0.5,
+                    )
+                    # Create a subplot with 1 row and 2 columns
+                    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
+                    # Display the input image
+                    axs[0].imshow(input_image_pil)
+                    axs[0].axis("off")
+                    axs[0].set_title("Input Image")
+                    # Display the overlayed CAM result
+                    axs[1].imshow(result)
+                    axs[1].axis("off")
+                    axs[1].set_title("Overlayed CAM")
+                    # Display the plot
+                    cols[-1].pyplot(fig)
+                    cols[-1].text(
+                        f"eye image size: {input_img.shape[-1]} x {input_img.shape[-2]}"
                     )
 if __name__ == "__main__":

config.yml CHANGED Viewed

@@ -19,9 +19,9 @@ xai_configs:
     "InputXGradient",
     "GuidedBackprop",
     "Deconvolution",
-    "GuidedGradCam",
-    "LayerGradCam",
-    "LayerGradientXActivation",
   ]
   cam_methods: [
     "CAM",

     "InputXGradient",
     "GuidedBackprop",
     "Deconvolution",
+    # "GuidedGradCam",
+    # "LayerGradCam",
+    # "LayerGradientXActivation",
   ]
   cam_methods: [
     "CAM",

feature_extraction/extractor_mediapipe.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import cv2
+import torch
+import warnings
+import numpy as np
+from PIL import Image
+from math import sqrt
+import mediapipe as mp
+from transformers import pipeline
+warnings.filterwarnings("ignore")
+class ExtractorMediaPipe:
+    def __init__(self, upscale=1):
+        self.upscale = int(upscale)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # ========== Face Extraction ==========
+        self.face_detector = mp.solutions.face_detection.FaceDetection(
+            model_selection=0, min_detection_confidence=0.5
+        )
+        self.face_mesh = mp.solutions.face_mesh.FaceMesh(
+            max_num_faces=1,
+            static_image_mode=True,
+            refine_landmarks=True,
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5,
+        )
+        # ========== Eyes Extraction ==========
+        self.RIGHT_EYE = [
+            362,
+            382,
+            381,
+            380,
+            374,
+            373,
+            390,
+            249,
+            263,
+            466,
+            388,
+            387,
+            386,
+            385,
+            384,
+            398,
+        ]
+        self.LEFT_EYE = [
+            33,
+            7,
+            163,
+            144,
+            145,
+            153,
+            154,
+            155,
+            133,
+            173,
+            157,
+            158,
+            159,
+            160,
+            161,
+            246,
+        ]
+        # https://huggingface.co/dima806/closed_eyes_image_detection
+        # https://www.kaggle.com/code/dima806/closed-eye-image-detection-vit
+        self.pipe = pipeline(
+            "image-classification",
+            model="dima806/closed_eyes_image_detection",
+            device=self.device,
+        )
+        self.blink_lower_thresh = 0.22
+        self.blink_upper_thresh = 0.25
+        self.blink_confidence = 0.50
+        # ========== Iris Extraction ==========
+        self.RIGHT_IRIS = [474, 475, 476, 477]
+        self.LEFT_IRIS = [469, 470, 471, 472]
+    def extract_face(self, image):
+        tmp_image = image.copy()
+        results = self.face_detector.process(tmp_image)
+        if not results.detections:
+            # print("No face detected")
+            return None
+        else:
+            bboxC = results.detections[0].location_data.relative_bounding_box
+            ih, iw, _ = image.shape
+            # Get bounding box coordinates
+            x, y, w, h = (
+                int(bboxC.xmin * iw),
+                int(bboxC.ymin * ih),
+                int(bboxC.width * iw),
+                int(bboxC.height * ih),
+            )
+            # Calculate the center of the bounding box
+            center_x = x + w // 2
+            center_y = y + h // 2
+            # Calculate new bounds ensuring they fit within the image dimensions
+            half_size = 128 * self.upscale
+            x1 = max(center_x - half_size, 0)
+            y1 = max(center_y - half_size, 0)
+            x2 = min(center_x + half_size, iw)
+            y2 = min(center_y + half_size, ih)
+            # Adjust x1, x2, y1, and y2 to ensure the cropped region is exactly (256 * self.upscale) x (256 * self.upscale)
+            if x2 - x1 < (256 * self.upscale):
+                if x1 == 0:
+                    x2 = min((256 * self.upscale), iw)
+                elif x2 == iw:
+                    x1 = max(iw - (256 * self.upscale), 0)
+            if y2 - y1 < (256 * self.upscale):
+                if y1 == 0:
+                    y2 = min((256 * self.upscale), ih)
+                elif y2 == ih:
+                    y1 = max(ih - (256 * self.upscale), 0)
+            cropped_face = image[y1:y2, x1:x2]
+            # bicubic upsampling
+            # if self.upscale != 1:
+            #     cropped_face = cv2.resize(
+            #         cropped_face,
+            #         (256 * self.upscale, 256 * self.upscale),
+            #         interpolation=cv2.INTER_CUBIC,
+            #     )
+            return cropped_face
+    @staticmethod
+    def landmarksDetection(image, results, draw=False):
+        image_height, image_width = image.shape[:2]
+        mesh_coordinates = [
+            (int(point.x * image_width), int(point.y * image_height))
+            for point in results.multi_face_landmarks[0].landmark
+        ]
+        if draw:
+            [cv2.circle(image, i, 2, (0, 255, 0), -1) for i in mesh_coordinates]
+        return mesh_coordinates
+    @staticmethod
+    def euclideanDistance(point, point1):
+        x, y = point
+        x1, y1 = point1
+        distance = sqrt((x1 - x) ** 2 + (y1 - y) ** 2)
+        return distance
+    def blinkRatio(self, landmarks, right_indices, left_indices):
+        right_eye_landmark1 = landmarks[right_indices[0]]
+        right_eye_landmark2 = landmarks[right_indices[8]]
+        right_eye_landmark3 = landmarks[right_indices[12]]
+        right_eye_landmark4 = landmarks[right_indices[4]]
+        left_eye_landmark1 = landmarks[left_indices[0]]
+        left_eye_landmark2 = landmarks[left_indices[8]]
+        left_eye_landmark3 = landmarks[left_indices[12]]
+        left_eye_landmark4 = landmarks[left_indices[4]]
+        right_eye_horizontal_distance = self.euclideanDistance(
+            right_eye_landmark1, right_eye_landmark2
+        )
+        right_eye_vertical_distance = self.euclideanDistance(
+            right_eye_landmark3, right_eye_landmark4
+        )
+        left_eye_vertical_distance = self.euclideanDistance(
+            left_eye_landmark3, left_eye_landmark4
+        )
+        left_eye_horizontal_distance = self.euclideanDistance(
+            left_eye_landmark1, left_eye_landmark2
+        )
+        right_eye_ratio = right_eye_vertical_distance / right_eye_horizontal_distance
+        left_eye_ratio = left_eye_vertical_distance / left_eye_horizontal_distance
+        eyes_ratio = (right_eye_ratio + left_eye_ratio) / 2
+        return eyes_ratio
+    def extract_eyes_regions(self, image, landmarks, eye_indices):
+        h, w, _ = image.shape
+        points = [
+            (int(landmarks[idx].x * w), int(landmarks[idx].y * h))
+            for idx in eye_indices
+        ]
+        x_min = min([p[0] for p in points])
+        x_max = max([p[0] for p in points])
+        y_min = min([p[1] for p in points])
+        y_max = max([p[1] for p in points])
+        center_x = (x_min + x_max) // 2
+        center_y = (y_min + y_max) // 2
+        target_width = 32 * self.upscale
+        target_height = 16 * self.upscale
+        x1 = max(center_x - target_width // 2, 0)
+        y1 = max(center_y - target_height // 2, 0)
+        x2 = x1 + target_width
+        y2 = y1 + target_height
+        if x2 > w:
+            x1 = w - target_width
+            x2 = w
+        if y2 > h:
+            y1 = h - target_height
+            y2 = h
+        return image[y1:y2, x1:x2]
+    def blink_detection_model(self, left_eye, right_eye):
+        left_eye = cv2.cvtColor(left_eye, cv2.COLOR_RGB2GRAY)
+        left_eye = Image.fromarray(left_eye)
+        preds_left = self.pipe(left_eye)
+        if preds_left[0]["label"] == "closeEye":
+            closed_left = preds_left[0]["score"] >= self.blink_confidence
+        else:
+            closed_left = preds_left[1]["score"] >= self.blink_confidence
+        right_eye = cv2.cvtColor(right_eye, cv2.COLOR_RGB2GRAY)
+        right_eye = Image.fromarray(right_eye)
+        preds_right = self.pipe(right_eye)
+        if preds_right[0]["label"] == "closeEye":
+            closed_right = preds_right[0]["score"] >= self.blink_confidence
+        else:
+            closed_right = preds_right[1]["score"] >= self.blink_confidence
+        # print("preds_left = ", preds_left)
+        # print("preds_right = ", preds_right)
+        return closed_left or closed_right
+    def extract_eyes(self, image, blink_detection=False):
+        tmp_face = image.copy()
+        results = self.face_mesh.process(tmp_face)
+        if results.multi_face_landmarks is None:
+            return None
+        face_landmarks = results.multi_face_landmarks[0].landmark
+        left_eye = self.extract_eyes_regions(image, face_landmarks, self.LEFT_EYE)
+        right_eye = self.extract_eyes_regions(image, face_landmarks, self.RIGHT_EYE)
+        blinked = False
+        if blink_detection:
+            mesh_coordinates = self.landmarksDetection(image, results, False)
+            eyes_ratio = self.blinkRatio(
+                mesh_coordinates, self.RIGHT_EYE, self.LEFT_EYE
+            )
+            if (
+                eyes_ratio > self.blink_lower_thresh
+                and eyes_ratio <= self.blink_upper_thresh
+            ):
+                # print(
+                #     "I think person blinked. eyes_ratio = ",
+                #     eyes_ratio,
+                #     "Confirming with ViT model...",
+                # )
+                blinked = self.blink_detection_model(
+                    left_eye=left_eye, right_eye=right_eye
+                )
+                # if blinked:
+                #     print("Yes, person blinked. Confirmed by model")
+                # else:
+                #     print("No, person didn't blinked. False Alarm")
+            elif eyes_ratio <= self.blink_lower_thresh:
+                blinked = True
+                # print("Surely person blinked. eyes_ratio = ", eyes_ratio)
+            else:
+                blinked = False
+        return {"left_eye": left_eye, "right_eye": right_eye, "blinked": blinked}
+    @staticmethod
+    def segment_iris(iris_img):
+        # Convert RGB image to grayscale
+        iris_img_gray = cv2.cvtColor(iris_img, cv2.COLOR_RGB2GRAY)
+        # Apply Gaussian blur for denoising
+        iris_img_blur = cv2.GaussianBlur(iris_img_gray, (5, 5), 0)
+        # Perform adaptive thresholding
+        _, iris_img_mask = cv2.threshold(
+            iris_img_blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+        )
+        # Invert the mask
+        segmented_mask = cv2.bitwise_not(iris_img_mask)
+        segmented_mask = cv2.cvtColor(segmented_mask, cv2.COLOR_GRAY2RGB)
+        segmented_iris = cv2.bitwise_and(iris_img, segmented_mask)
+        return {
+            "segmented_iris": segmented_iris,
+            "segmented_mask": segmented_mask,
+        }
+    def extract_iris(self, image):
+        ih, iw, _ = image.shape
+        tmp_face = image.copy()
+        results = self.face_mesh.process(tmp_face)
+        if results.multi_face_landmarks is None:
+            return None
+        mesh_coordinates = self.landmarksDetection(image, results, False)
+        mesh_points = np.array(mesh_coordinates)
+        (l_cx, l_cy), l_radius = cv2.minEnclosingCircle(mesh_points[self.LEFT_IRIS])
+        (r_cx, r_cy), r_radius = cv2.minEnclosingCircle(mesh_points[self.RIGHT_IRIS])
+        # Crop the left iris to be exactly 16*upscaled x 16*upscaled
+        l_x1 = max(int(l_cx) - (8 * self.upscale), 0)
+        l_y1 = max(int(l_cy) - (8 * self.upscale), 0)
+        l_x2 = min(int(l_cx) + (8 * self.upscale), iw)
+        l_y2 = min(int(l_cy) + (8 * self.upscale), ih)
+        cropped_left_iris = image[l_y1:l_y2, l_x1:l_x2]
+        left_iris_segmented_data = self.segment_iris(
+            cv2.cvtColor(cropped_left_iris, cv2.COLOR_BGR2RGB)
+        )
+        # Crop the right iris to be exactly 16*upscaled x 16*upscaled
+        r_x1 = max(int(r_cx) - (8 * self.upscale), 0)
+        r_y1 = max(int(r_cy) - (8 * self.upscale), 0)
+        r_x2 = min(int(r_cx) + (8 * self.upscale), iw)
+        r_y2 = min(int(r_cy) + (8 * self.upscale), ih)
+        cropped_right_iris = image[r_y1:r_y2, r_x1:r_x2]
+        right_iris_segmented_data = self.segment_iris(
+            cv2.cvtColor(cropped_right_iris, cv2.COLOR_BGR2RGB)
+        )
+        return {
+            "left_iris": {
+                "img": cropped_left_iris,
+                "segmented_iris": left_iris_segmented_data["segmented_iris"],
+                "segmented_mask": left_iris_segmented_data["segmented_mask"],
+            },
+            "right_iris": {
+                "img": cropped_right_iris,
+                "segmented_iris": right_iris_segmented_data["segmented_iris"],
+                "segmented_mask": right_iris_segmented_data["segmented_mask"],
+            },
+        }

feature_extraction/features_extractor.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import sys
+import warnings
+import os.path as osp
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from feature_extraction.extractor_mediapipe import ExtractorMediaPipe
+warnings.filterwarnings("ignore")
+class FeaturesExtractor:
+    def __init__(
+        self, extraction_library="mediapipe", blink_detection=False, upscale=1
+    ):
+        self.upscale = upscale
+        self.blink_detection = blink_detection
+        self.extraction_library = extraction_library
+        self.feature_extractor = ExtractorMediaPipe(self.upscale)
+    def __call__(self, image):
+        results = {}
+        face = self.feature_extractor.extract_face(image)
+        if face is None:
+            # print("No face found. Skipped feature extraction!")
+            return None
+        else:
+            results["img"] = image
+            results["face"] = face
+            eyes_data = self.feature_extractor.extract_eyes(image, self.blink_detection)
+            if eyes_data is None:
+                # print("No eyes found. Skipped feature extraction!")
+                return results
+            else:
+                results["eyes"] = eyes_data
+                if eyes_data["blinked"]:
+                    # print("Found blinked eyes!")
+                    return results
+                else:
+                    iris_data = self.feature_extractor.extract_iris(image)
+                    if iris_data is None:
+                        # print("No iris found. Skipped feature extraction!")
+                        return results
+                    else:
+                        results["iris"] = iris_data
+                        return results

packages.txt DELETED Viewed

File without changes

preprocessing/dataset_creation.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import sys
+import cv2
+import os.path as osp
+root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+sys.path.append(root_path)
+from preprocessing.dataset_creation_utils import get_sr_method
+from feature_extraction.features_extractor import FeaturesExtractor
+class EyeDentityDatasetCreation:
+    def __init__(self, feature_extraction_configs, sr_configs=None):
+        self.extraction_library = feature_extraction_configs["extraction_library"]
+        self.sr_configs = sr_configs
+        if self.sr_configs:
+            self.sr_method_name = sr_configs["method"]
+            self.upscale = sr_configs["params"]["upscale"]
+            if self.sr_method_name != "-":
+                self.sr_method = get_sr_method(self, sr_configs)
+        else:
+            self.upscale = 1
+        self.blink_detection = feature_extraction_configs["blink_detection"]
+        self.features_extractor = FeaturesExtractor(
+            extraction_library=self.extraction_library,
+            blink_detection=self.blink_detection,
+            upscale=self.upscale,
+        )
+    def __call__(self, img):
+        # img = cv2.imread(img)
+        if self.sr_configs is None or self.sr_configs != "-":
+            img = img
+        else:
+            img = self.sr_method(img)
+        result_dict = self.features_extractor(img)
+        return result_dict

preprocessing/dataset_creation_utils.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import torch
+import random
+import numpy as np
+from SR_Inference.inference_hat import HAT
+from SR_Inference.inference_gfpgan import GFPGAN
+from SR_Inference.inference_realesr import RealEsr
+from SR_Inference.inference_srresnet import SRResNet
+from SR_Inference.inference_codeformer import CodeFormer
+def seed_everything(seed=42):
+    random.seed(seed)
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.backends.cudnn.benchmark = True
+    torch.backends.cudnn.deterministic = True
+def get_sr_method(self, sr_configs):
+    sr_method_class = globals().get(self.sr_method_name)
+    if sr_method_class is not None:
+        return sr_method_class(**sr_configs["params"])
+    else:
+        raise Exception(
+            f"No such SR method called '{self.sr_method_name}' implemented!"
+        )

registrations/models.py CHANGED Viewed

@@ -121,4 +121,4 @@ class ResNet50(nn.Module):
         return x
-print("Registered models in MODEL_REGISTRY:", MODEL_REGISTRY.keys())


121	return x
122
123
124	+ # print("Registered models in MODEL_REGISTRY:", MODEL_REGISTRY.keys())

registry_utils.py CHANGED Viewed

@@ -58,22 +58,22 @@ def import_registered_modules(registration_folder="registrations"):
         list: List of imported modules.
     """
-    print("\n")
     registration_modules_folder = (
         osp.dirname(osp.abspath(__file__)) + f"/{registration_folder}"
     )
-    print("registration_modules_folder = ", registration_modules_folder)
     registration_modules_file_names = [
         osp.splitext(osp.basename(v))[0]
         for v in scandir(dir_path=registration_modules_folder)
     ]
-    print("registration_modules_file_names = ", registration_modules_file_names)
     imported_modules = [
         importlib.import_module(f"{registration_folder}.{file_name}")
         for file_name in registration_modules_file_names
     ]
-    print("imported_modules = ", imported_modules)
-    print("\n")

         list: List of imported modules.
     """
+    # print("\n")
     registration_modules_folder = (
         osp.dirname(osp.abspath(__file__)) + f"/{registration_folder}"
     )
+    # print("registration_modules_folder = ", registration_modules_folder)
     registration_modules_file_names = [
         osp.splitext(osp.basename(v))[0]
         for v in scandir(dir_path=registration_modules_folder)
     ]
+    # print("registration_modules_file_names = ", registration_modules_file_names)
     imported_modules = [
         importlib.import_module(f"{registration_folder}.{file_name}")
         for file_name in registration_modules_file_names
     ]
+    # print("imported_modules = ", imported_modules)
+    # print("\n")

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 tqdm
 PyYAML
 numpy
@@ -10,7 +11,7 @@ scikit_learn
 torch
 captum
 evaluate
-# basicsr
 facexlib
 realesrgan
 opencv_python
@@ -18,7 +19,7 @@ cmake
 dlib
 einops
 transformers
-# gfpgan
 # streamlit
 mediapipe
 imutils

+# https://huggingface.co/docs/hub/en/spaces-dependencies
 tqdm
 PyYAML
 numpy
 torch
 captum
 evaluate
+basicsr
 facexlib
 realesrgan
 opencv_python
 dlib
 einops
 transformers
+gfpgan
 # streamlit
 mediapipe
 imutils

xx-packages.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# https://huggingface.co/docs/hub/en/spaces-dependencies
+# tqdm
+# PyYAML
+# numpy
+# pandas
+# matplotlib
+# seaborn
+# mlflow
+# pillow
+# scikit_learn
+# torch
+# captum
+# evaluate
+# basicsr
+# facexlib
+# realesrgan
+# opencv_python
+# cmake
+# dlib
+# einops
+# transformers
+# gfpgan
+# streamlit
+# mediapipe
+# imutils
+# scipy
+# torchvision==0.16.0
+# torchcam