Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import torch.nn as nn | |
from torch import Tensor | |
import torch.nn.init as init | |
import torch.nn.functional as F | |
EPS = 1e-8 | |
class GlobalLayerNorm(nn.Module): | |
"""Calculate Global Layer Normalization. | |
Arguments | |
--------- | |
dim : (int or list or torch.Size) | |
Input shape from an expected input of size. | |
eps : float | |
A value added to the denominator for numerical stability. | |
elementwise_affine : bool | |
A boolean value that when set to True, | |
this module has learnable per-element affine parameters | |
initialized to ones (for weights) and zeros (for biases). | |
Example | |
------- | |
>>> x = torch.randn(5, 10, 20) | |
>>> GLN = GlobalLayerNorm(10, 3) | |
>>> x_norm = GLN(x) | |
""" | |
def __init__(self, dim, shape, eps=1e-8, elementwise_affine=True): | |
super(GlobalLayerNorm, self).__init__() | |
self.dim = dim | |
self.eps = eps | |
self.elementwise_affine = elementwise_affine | |
if self.elementwise_affine: | |
if shape == 3: | |
self.weight = nn.Parameter(torch.ones(self.dim, 1)) | |
self.bias = nn.Parameter(torch.zeros(self.dim, 1)) | |
if shape == 4: | |
self.weight = nn.Parameter(torch.ones(self.dim, 1, 1)) | |
self.bias = nn.Parameter(torch.zeros(self.dim, 1, 1)) | |
else: | |
self.register_parameter("weight", None) | |
self.register_parameter("bias", None) | |
def forward(self, x): | |
"""Returns the normalized tensor. | |
Arguments | |
--------- | |
x : torch.Tensor | |
Tensor of size [N, C, K, S] or [N, C, L]. | |
""" | |
# x = N x C x K x S or N x C x L | |
# N x 1 x 1 | |
# cln: mean,var N x 1 x K x S | |
# gln: mean,var N x 1 x 1 | |
if x.dim() == 3: | |
mean = torch.mean(x, (1, 2), keepdim=True) | |
var = torch.mean((x - mean) ** 2, (1, 2), keepdim=True) | |
if self.elementwise_affine: | |
x = ( | |
self.weight * (x - mean) / torch.sqrt(var + self.eps) | |
+ self.bias | |
) | |
else: | |
x = (x - mean) / torch.sqrt(var + self.eps) | |
if x.dim() == 4: | |
mean = torch.mean(x, (1, 2, 3), keepdim=True) | |
var = torch.mean((x - mean) ** 2, (1, 2, 3), keepdim=True) | |
if self.elementwise_affine: | |
x = ( | |
self.weight * (x - mean) / torch.sqrt(var + self.eps) | |
+ self.bias | |
) | |
else: | |
x = (x - mean) / torch.sqrt(var + self.eps) | |
return x | |
class CumulativeLayerNorm(nn.LayerNorm): | |
"""Calculate Cumulative Layer Normalization. | |
Arguments | |
--------- | |
dim : int | |
Dimension that you want to normalize. | |
elementwise_affine : True | |
Learnable per-element affine parameters. | |
Example | |
------- | |
>>> x = torch.randn(5, 10, 20) | |
>>> CLN = CumulativeLayerNorm(10) | |
>>> x_norm = CLN(x) | |
""" | |
def __init__(self, dim, elementwise_affine=True): | |
super(CumulativeLayerNorm, self).__init__( | |
dim, elementwise_affine=elementwise_affine, eps=1e-8 | |
) | |
def forward(self, x): | |
"""Returns the normalized tensor. | |
Arguments | |
--------- | |
x : torch.Tensor | |
Tensor size [N, C, K, S] or [N, C, L] | |
""" | |
# x: N x C x K x S or N x C x L | |
# N x K x S x C | |
if x.dim() == 4: | |
x = x.permute(0, 2, 3, 1).contiguous() | |
# N x K x S x C == only channel norm | |
x = super().forward(x) | |
# N x C x K x S | |
x = x.permute(0, 3, 1, 2).contiguous() | |
if x.dim() == 3: | |
x = torch.transpose(x, 1, 2) | |
# N x L x C == only channel norm | |
x = super().forward(x) | |
# N x C x L | |
x = torch.transpose(x, 1, 2) | |
return x | |
def select_norm(norm, dim, shape): | |
"""Just a wrapper to select the normalization type. | |
""" | |
if norm == "gln": | |
return GlobalLayerNorm(dim, shape, elementwise_affine=True) | |
if norm == "cln": | |
return CumulativeLayerNorm(dim, elementwise_affine=True) | |
if norm == "ln": | |
return nn.GroupNorm(1, dim, eps=1e-8) | |
else: | |
return nn.BatchNorm1d(dim) | |
class Swish(nn.Module): | |
""" | |
Swish is a smooth, non-monotonic function that consistently matches or outperforms ReLU on deep networks applied | |
to a variety of challenging domains such as Image classification and Machine translation. | |
""" | |
def __init__(self): | |
super(Swish, self).__init__() | |
def forward(self, inputs: Tensor) -> Tensor: | |
return inputs * inputs.sigmoid() | |
class GLU(nn.Module): | |
""" | |
The gating mechanism is called Gated Linear Units (GLU), which was first introduced for natural language processing | |
in the paper “Language Modeling with Gated Convolutional Networks” | |
""" | |
def __init__(self, dim: int) -> None: | |
super(GLU, self).__init__() | |
self.dim = dim | |
def forward(self, inputs: Tensor) -> Tensor: | |
outputs, gate = inputs.chunk(2, dim=self.dim) | |
return outputs * gate.sigmoid() | |
class Transpose(nn.Module): | |
""" Wrapper class of torch.transpose() for Sequential module. """ | |
def __init__(self, shape: tuple): | |
super(Transpose, self).__init__() | |
self.shape = shape | |
def forward(self, x: Tensor) -> Tensor: | |
return x.transpose(*self.shape) | |
class Linear(nn.Module): | |
""" | |
Wrapper class of torch.nn.Linear | |
Weight initialize by xavier initialization and bias initialize to zeros. | |
""" | |
def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: | |
super(Linear, self).__init__() | |
self.linear = nn.Linear(in_features, out_features, bias=bias) | |
init.xavier_uniform_(self.linear.weight) | |
if bias: | |
init.zeros_(self.linear.bias) | |
def forward(self, x: Tensor) -> Tensor: | |
return self.linear(x) | |
class DepthwiseConv1d(nn.Module): | |
""" | |
When groups == in_channels and out_channels == K * in_channels, where K is a positive integer, | |
this operation is termed in literature as depthwise convolution. | |
Args: | |
in_channels (int): Number of channels in the input | |
out_channels (int): Number of channels produced by the convolution | |
kernel_size (int or tuple): Size of the convolving kernel | |
stride (int, optional): Stride of the convolution. Default: 1 | |
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 | |
bias (bool, optional): If True, adds a learnable bias to the output. Default: True | |
Inputs: inputs | |
- **inputs** (batch, in_channels, time): Tensor containing input vector | |
Returns: outputs | |
- **outputs** (batch, out_channels, time): Tensor produces by depthwise 1-D convolution. | |
""" | |
def __init__( | |
self, | |
in_channels: int, | |
out_channels: int, | |
kernel_size: int, | |
stride: int = 1, | |
padding: int = 0, | |
bias: bool = False, | |
) -> None: | |
super(DepthwiseConv1d, self).__init__() | |
assert out_channels % in_channels == 0, "out_channels should be constant multiple of in_channels" | |
self.conv = nn.Conv1d( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=kernel_size, | |
groups=in_channels, | |
stride=stride, | |
padding=padding, | |
bias=bias, | |
) | |
def forward(self, inputs: Tensor) -> Tensor: | |
return self.conv(inputs) | |
class PointwiseConv1d(nn.Module): | |
""" | |
When kernel size == 1 conv1d, this operation is termed in literature as pointwise convolution. | |
This operation often used to match dimensions. | |
Args: | |
in_channels (int): Number of channels in the input | |
out_channels (int): Number of channels produced by the convolution | |
stride (int, optional): Stride of the convolution. Default: 1 | |
padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0 | |
bias (bool, optional): If True, adds a learnable bias to the output. Default: True | |
Inputs: inputs | |
- **inputs** (batch, in_channels, time): Tensor containing input vector | |
Returns: outputs | |
- **outputs** (batch, out_channels, time): Tensor produces by pointwise 1-D convolution. | |
""" | |
def __init__( | |
self, | |
in_channels: int, | |
out_channels: int, | |
stride: int = 1, | |
padding: int = 0, | |
bias: bool = True, | |
) -> None: | |
super(PointwiseConv1d, self).__init__() | |
self.conv = nn.Conv1d( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=1, | |
stride=stride, | |
padding=padding, | |
bias=bias, | |
) | |
def forward(self, inputs: Tensor) -> Tensor: | |
return self.conv(inputs) | |
class ConvModule(nn.Module): | |
""" | |
Conformer convolution module starts with a pointwise convolution and a gated linear unit (GLU). | |
This is followed by a single 1-D depthwise convolution layer. Batchnorm is deployed just after the convolution | |
to aid training deep models. | |
Args: | |
in_channels (int): Number of channels in the input | |
kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31 | |
dropout_p (float, optional): probability of dropout | |
Inputs: inputs | |
inputs (batch, time, dim): Tensor contains input sequences | |
Outputs: outputs | |
outputs (batch, time, dim): Tensor produces by conformer convolution module. | |
""" | |
def __init__( | |
self, | |
in_channels: int, | |
kernel_size: int = 17, | |
expansion_factor: int = 2, | |
dropout_p: float = 0.1, | |
) -> None: | |
super(ConvModule, self).__init__() | |
assert (kernel_size - 1) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding" | |
assert expansion_factor == 2, "Currently, Only Supports expansion_factor 2" | |
self.sequential = nn.Sequential( | |
Transpose(shape=(1, 2)), | |
DepthwiseConv1d(in_channels, in_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2), | |
) | |
def forward(self, inputs: Tensor) -> Tensor: | |
return inputs + self.sequential(inputs).transpose(1, 2) | |
class ConvModule_Dilated(nn.Module): | |
""" | |
Conformer convolution module starts with a pointwise convolution and a gated linear unit (GLU). | |
This is followed by a single 1-D depthwise convolution layer. Batchnorm is deployed just after the convolution | |
to aid training deep models. | |
Args: | |
in_channels (int): Number of channels in the input | |
kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31 | |
dropout_p (float, optional): probability of dropout | |
Inputs: inputs | |
inputs (batch, time, dim): Tensor contains input sequences | |
Outputs: outputs | |
outputs (batch, time, dim): Tensor produces by conformer convolution module. | |
""" | |
def __init__( | |
self, | |
in_channels: int, | |
kernel_size: int = 17, | |
expansion_factor: int = 2, | |
dropout_p: float = 0.1, | |
) -> None: | |
super(ConvModule_Gating, self).__init__() | |
assert (kernel_size - 1) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding" | |
assert expansion_factor == 2, "Currently, Only Supports expansion_factor 2" | |
self.sequential = nn.Sequential( | |
Transpose(shape=(1, 2)), | |
DepthwiseConv1d(in_channels, in_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2), | |
) | |
def forward(self, inputs: Tensor) -> Tensor: | |
return inputs + self.sequential(inputs).transpose(1, 2) | |
class DilatedDenseNet(nn.Module): | |
def __init__(self, depth=4, lorder=20, in_channels=64): | |
super(DilatedDenseNet, self).__init__() | |
self.depth = depth | |
self.in_channels = in_channels | |
self.pad = nn.ConstantPad2d((1, 1, 1, 0), value=0.) | |
self.twidth = lorder*2-1 | |
self.kernel_size = (self.twidth, 1) | |
for i in range(self.depth): | |
dil = 2 ** i | |
pad_length = lorder + (dil - 1) * (lorder - 1) - 1 | |
setattr(self, 'pad{}'.format(i + 1), nn.ConstantPad2d((0, 0, pad_length, pad_length), value=0.)) | |
setattr(self, 'conv{}'.format(i + 1), | |
nn.Conv2d(self.in_channels*(i+1), self.in_channels, kernel_size=self.kernel_size, | |
dilation=(dil, 1), groups=self.in_channels, bias=False)) | |
setattr(self, 'norm{}'.format(i + 1), nn.InstanceNorm2d(in_channels, affine=True)) | |
setattr(self, 'prelu{}'.format(i + 1), nn.PReLU(self.in_channels)) | |
def forward(self, x): | |
x = torch.unsqueeze(x, 1) | |
x_per = x.permute(0, 3, 2, 1) | |
skip = x_per | |
for i in range(self.depth): | |
out = getattr(self, 'pad{}'.format(i + 1))(skip) | |
out = getattr(self, 'conv{}'.format(i + 1))(out) | |
out = getattr(self, 'norm{}'.format(i + 1))(out) | |
out = getattr(self, 'prelu{}'.format(i + 1))(out) | |
skip = torch.cat([out, skip], dim=1) | |
out1 = out.permute(0, 3, 2, 1) | |
return out1.squeeze(1) | |
class FFConvM_Dilated(nn.Module): | |
def __init__( | |
self, | |
dim_in, | |
dim_out, | |
norm_klass = nn.LayerNorm, | |
dropout = 0.1 | |
): | |
super().__init__() | |
self.mdl = nn.Sequential( | |
norm_klass(dim_in), | |
nn.Linear(dim_in, dim_out), | |
nn.SiLU(), | |
DilatedDenseNet(depth=2, lorder=17, in_channels=dim_out), | |
nn.Dropout(dropout) | |
) | |
def forward( | |
self, | |
x, | |
): | |
output = self.mdl(x) | |
return output | |