Spaces:

alibabasglab
/

ClearVoice

Running on Zero

App Files Files Community

ClearVoice / models /mossformer_gan_se /conv_module.py

alibabasglab

Upload 161 files

8e8cd3e verified 3 months ago

raw

history blame

18.4 kB

	import torch
	import torch.nn as nn
	from torch import Tensor
	import torch.nn.init as init
	import torch.nn.functional as F

	class UniDeepFsmn(nn.Module):

	def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None, dropout_p=0.1):
	super(UniDeepFsmn, self).__init__()

	self.input_dim = input_dim
	self.output_dim = output_dim

	if lorder is None:
	return

	self.lorder = lorder
	self.rorder = lorder
	self.hidden_size = hidden_size

	self.linear = nn.Linear(input_dim, hidden_size)

	self.project = nn.Linear(hidden_size, output_dim, bias=False)

	self.conv1 = nn.Conv2d(input_dim, output_dim, [self.lorder+self.rorder-1, 1], [1, 1], groups=input_dim, bias=False)
	self.norm = nn.LayerNorm(input_dim)
	self.dropout = nn.Dropout(p=dropout_p)
	self.swish = Swish()

	def forward(self, input):
	## input: batch (b) x sequence(T) x feature (h)
	f1 = self.swish(self.linear(self.norm(input)))

	p1 = self.project(f1)

	x = torch.unsqueeze(p1, 1)
	#x: batch (b) x channel (c) x sequence(T) x feature (h)
	x_per = x.permute(0, 3, 2, 1)
	#x_per: batch (b) x feature (h) x sequence(T) x channel (c)
	y = F.pad(x_per, [0, 0, self.lorder - 1, self.rorder - 1])

	out = x_per + self.conv1(y)

	out1 = out.permute(0, 3, 2, 1)
	#out1: batch (b) x channel (c) x sequence(T) x feature (h)
	return input + out1.squeeze()


	class GlobalLayerNorm(nn.Module):
	"""Calculate Global Layer Normalization.

	Arguments
	---------
	dim : (int or list or torch.Size)
	Input shape from an expected input of size.
	eps : float
	A value added to the denominator for numerical stability.
	elementwise_affine : bool
	A boolean value that when set to True,
	this module has learnable per-element affine parameters
	initialized to ones (for weights) and zeros (for biases).

	Example
	-------
	>>> x = torch.randn(5, 10, 20)
	>>> GLN = GlobalLayerNorm(10, 3)
	>>> x_norm = GLN(x)
	"""

	def __init__(self, dim, shape, eps=1e-8, elementwise_affine=True):
	super(GlobalLayerNorm, self).__init__()
	self.dim = dim
	self.eps = eps
	self.elementwise_affine = elementwise_affine

	if self.elementwise_affine:
	if shape == 3:
	self.weight = nn.Parameter(torch.ones(self.dim, 1))
	self.bias = nn.Parameter(torch.zeros(self.dim, 1))
	if shape == 4:
	self.weight = nn.Parameter(torch.ones(self.dim, 1, 1))
	self.bias = nn.Parameter(torch.zeros(self.dim, 1, 1))
	else:
	self.register_parameter("weight", None)
	self.register_parameter("bias", None)

	def forward(self, x):
	"""Returns the normalized tensor.

	Arguments
	---------
	x : torch.Tensor
	Tensor of size [N, C, K, S] or [N, C, L].
	"""
	# x = N x C x K x S or N x C x L
	# N x 1 x 1
	# cln: mean,var N x 1 x K x S
	# gln: mean,var N x 1 x 1
	if x.dim() == 3:
	mean = torch.mean(x, (1, 2), keepdim=True)
	var = torch.mean((x - mean) ** 2, (1, 2), keepdim=True)
	if self.elementwise_affine:
	x = (
	self.weight * (x - mean) / torch.sqrt(var + self.eps)
	+ self.bias
	)
	else:
	x = (x - mean) / torch.sqrt(var + self.eps)

	if x.dim() == 4:
	mean = torch.mean(x, (1, 2, 3), keepdim=True)
	var = torch.mean((x - mean) ** 2, (1, 2, 3), keepdim=True)
	if self.elementwise_affine:
	x = (
	self.weight * (x - mean) / torch.sqrt(var + self.eps)
	+ self.bias
	)
	else:
	x = (x - mean) / torch.sqrt(var + self.eps)
	return x


	class CumulativeLayerNorm(nn.LayerNorm):
	"""Calculate Cumulative Layer Normalization.

	Arguments
	---------
	dim : int
	Dimension that you want to normalize.
	elementwise_affine : True
	Learnable per-element affine parameters.

	Example
	-------
	>>> x = torch.randn(5, 10, 20)
	>>> CLN = CumulativeLayerNorm(10)
	>>> x_norm = CLN(x)
	"""

	def __init__(self, dim, elementwise_affine=True):
	super(CumulativeLayerNorm, self).__init__(
	dim, elementwise_affine=elementwise_affine, eps=1e-8
	)

	def forward(self, x):
	"""Returns the normalized tensor.

	Arguments
	---------
	x : torch.Tensor
	Tensor size [N, C, K, S] or [N, C, L]
	"""
	# x: N x C x K x S or N x C x L
	# N x K x S x C
	if x.dim() == 4:
	x = x.permute(0, 2, 3, 1).contiguous()
	# N x K x S x C == only channel norm
	x = super().forward(x)
	# N x C x K x S
	x = x.permute(0, 3, 1, 2).contiguous()
	if x.dim() == 3:
	x = torch.transpose(x, 1, 2)
	# N x L x C == only channel norm
	x = super().forward(x)
	# N x C x L
	x = torch.transpose(x, 1, 2)
	return x


	def select_norm(norm, dim, shape):
	"""Just a wrapper to select the normalization type.
	"""

	if norm == "gln":
	return GlobalLayerNorm(dim, shape, elementwise_affine=True)
	if norm == "cln":
	return CumulativeLayerNorm(dim, elementwise_affine=True)
	if norm == "ln":
	return nn.GroupNorm(1, dim, eps=1e-8)
	else:
	return nn.BatchNorm1d(dim)

	class Swish(nn.Module):
	"""
	Swish is a smooth, non-monotonic function that consistently matches or outperforms ReLU on deep networks applied
	to a variety of challenging domains such as Image classification and Machine translation.
	"""
	def __init__(self):
	super(Swish, self).__init__()

	def forward(self, inputs: Tensor) -> Tensor:
	return inputs * inputs.sigmoid()


	class GLU(nn.Module):
	"""
	The gating mechanism is called Gated Linear Units (GLU), which was first introduced for natural language processing
	in the paper “Language Modeling with Gated Convolutional Networks”
	"""
	def __init__(self, dim: int) -> None:
	super(GLU, self).__init__()
	self.dim = dim

	def forward(self, inputs: Tensor) -> Tensor:
	outputs, gate = inputs.chunk(2, dim=self.dim)
	return outputs * gate.sigmoid()

	class Transpose(nn.Module):
	""" Wrapper class of torch.transpose() for Sequential module. """
	def __init__(self, shape: tuple):
	super(Transpose, self).__init__()
	self.shape = shape

	def forward(self, x: Tensor) -> Tensor:
	return x.transpose(*self.shape)

	class Linear(nn.Module):
	"""
	Wrapper class of torch.nn.Linear
	Weight initialize by xavier initialization and bias initialize to zeros.
	"""
	def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
	super(Linear, self).__init__()
	self.linear = nn.Linear(in_features, out_features, bias=bias)
	init.xavier_uniform_(self.linear.weight)
	if bias:
	init.zeros_(self.linear.bias)

	def forward(self, x: Tensor) -> Tensor:
	return self.linear(x)

	class DepthwiseConv1d(nn.Module):
	"""
	When groups == in_channels and out_channels == K * in_channels, where K is a positive integer,
	this operation is termed in literature as depthwise convolution.
	Args:
	in_channels (int): Number of channels in the input
	out_channels (int): Number of channels produced by the convolution
	kernel_size (int or tuple): Size of the convolving kernel
	stride (int, optional): Stride of the convolution. Default: 1
	padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
	bias (bool, optional): If True, adds a learnable bias to the output. Default: True
	Inputs: inputs
	- inputs (batch, in_channels, time): Tensor containing input vector
	Returns: outputs
	- outputs (batch, out_channels, time): Tensor produces by depthwise 1-D convolution.
	"""
	def __init__(
	self,
	in_channels: int,
	out_channels: int,
	kernel_size: int,
	stride: int = 1,
	padding: int = 0,
	bias: bool = False,
	) -> None:
	super(DepthwiseConv1d, self).__init__()
	assert out_channels % in_channels == 0, "out_channels should be constant multiple of in_channels"
	self.conv = nn.Conv1d(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	groups=in_channels,
	stride=stride,
	padding=padding,
	bias=bias,
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return self.conv(inputs)

	class DepthwiseConv2d(nn.Module):
	"""
	When groups == in_channels and out_channels == K * in_channels, where K is a positive integer,
	this operation is termed in literature as depthwise convolution.
	Args:
	in_channels (int): Number of channels in the input
	out_channels (int): Number of channels produced by the convolution
	kernel_size (int or tuple): Size of the convolving kernel
	stride (int, optional): Stride of the convolution. Default: 1
	padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
	bias (bool, optional): If True, adds a learnable bias to the output. Default: True
	Inputs: inputs
	- inputs (batch, in_channels, time): Tensor containing input vector
	Returns: outputs
	- outputs (batch, out_channels, time): Tensor produces by depthwise 1-D convolution.
	"""
	def __init__(
	self,
	in_channels: int,
	out_channels: int,
	kernel_size: int,
	stride: int = 1,
	padding: int = 0,
	bias: bool = False,
	) -> None:
	super(DepthwiseConv2d, self).__init__()
	assert out_channels % in_channels == 0, "out_channels should be constant multiple of in_channels"
	self.lorder = kernel_size
	self.rorder = self.lorder
	self.conv = nn.Conv2d(in_channels, out_channels, [self.lorder+self.rorder-1, 1], [1, 1], groups=in_channels, bias=False)
	'''
	self.conv = nn.Conv1d(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	groups=in_channels,
	stride=stride,
	padding=padding,
	bias=bias,
	)
	'''
	def forward(self, inputs: Tensor) -> Tensor:
	##input: batch x feature x sequence
	x = torch.unsqueeze(inputs, -1)
	#x_per = x.permute(0, 3, 2, 1)
	#x_per: batch (b) x feature (h) x sequence(T) x channel (c)
	#y = F.pad(x_per, [0, 0, self.lorder - 1, 0])
	y = F.pad(x, [0, 0, self.lorder - 1, self.rorder - 1])

	out = x + self.conv(y)

	#out1 = out.permute(0, 3, 2, 1)
	#out1: batch (b) x channel (c) x sequence(T) x feature (h)
	return out.squeeze(-1)

	class PointwiseConv1d(nn.Module):
	"""
	When kernel size == 1 conv1d, this operation is termed in literature as pointwise convolution.
	This operation often used to match dimensions.
	Args:
	in_channels (int): Number of channels in the input
	out_channels (int): Number of channels produced by the convolution
	stride (int, optional): Stride of the convolution. Default: 1
	padding (int or tuple, optional): Zero-padding added to both sides of the input. Default: 0
	bias (bool, optional): If True, adds a learnable bias to the output. Default: True
	Inputs: inputs
	- inputs (batch, in_channels, time): Tensor containing input vector
	Returns: outputs
	- outputs (batch, out_channels, time): Tensor produces by pointwise 1-D convolution.
	"""
	def __init__(
	self,
	in_channels: int,
	out_channels: int,
	stride: int = 1,
	padding: int = 0,
	bias: bool = True,
	) -> None:
	super(PointwiseConv1d, self).__init__()
	self.conv = nn.Conv1d(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=1,
	stride=stride,
	padding=padding,
	bias=bias,
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return self.conv(inputs)


	class ConvModule(nn.Module):
	"""
	Modified from Conformer convolution module
	Args:
	in_channels (int): Number of channels in the input
	kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31
	dropout_p (float, optional): probability of dropout
	Inputs: inputs
	inputs (batch, time, dim): Tensor contains input sequences
	Outputs: outputs
	outputs (batch, time, dim): Tensor produces by conformer convolution module.
	"""
	def __init__(
	self,
	in_channels: int,
	kernel_size: int = 31,
	expansion_factor: int = 2,
	dropout_p: float = 0.1,
	) -> None:
	super(ConvModule, self).__init__()
	assert (kernel_size - 1) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding"
	assert expansion_factor == 2, "Currently, Only Supports expansion_factor 2"

	self.sequential = nn.Sequential(
	Transpose(shape=(1, 2)),
	DepthwiseConv1d(in_channels, in_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2),
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return inputs + self.sequential(inputs).transpose(1, 2)

	class ConvModule_Gating(nn.Module):
	"""
	Modified from Conformer convolution module
	Args:
	in_channels (int): Number of channels in the input
	kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31
	dropout_p (float, optional): probability of dropout
	Inputs: inputs
	inputs (batch, time, dim): Tensor contains input sequences
	Outputs: outputs
	outputs (batch, time, dim): Tensor produces by conformer convolution module.
	"""
	def __init__(
	self,
	in_channels: int,
	kernel_size: int = 20,
	expansion_factor: int = 2,
	dropout_p: float = 0.1,
	) -> None:
	super(ConvModule_Gating, self).__init__()
	assert (kernel_size - 1) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding"
	assert expansion_factor == 2, "Currently, Only Supports expansion_factor 2"
	self.sequential = nn.Sequential(
	Transpose(shape=(1, 2)),
	DepthwiseConv1d(in_channels, in_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2),
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return inputs * self.sequential(inputs).transpose(1, 2)

	class Conformer_ConvModule(nn.Module):
	"""
	Conformer convolution module starts with a pointwise convolution and a gated linear unit (GLU).
	This is followed by a single 1-D depthwise convolution layer. Batchnorm is deployed just after the convolution
	to aid training deep models.
	Args:
	in_channels (int): Number of channels in the input
	kernel_size (int or tuple, optional): Size of the convolving kernel Default: 31
	dropout_p (float, optional): probability of dropout
	Inputs: inputs
	inputs (batch, dim, time): Tensor contains input sequences
	Outputs: outputs
	outputs (batch, dim, time): Tensor produces by conformer convolution module.
	"""
	def __init__(
	self,
	in_channels: int,
	kernel_size: int = 21,
	expansion_factor: int = 2,
	dropout_p: float = 0.1,
	) -> None:
	super(Conformer_ConvModule, self).__init__()
	assert (kernel_size - 1) % 2 == 0, "kernel_size should be a odd number for 'SAME' padding"
	assert expansion_factor == 2, "Currently, Only Supports expansion_factor 2"

	self.sequential = nn.Sequential(
	select_norm('ln',in_channels,3),
	PointwiseConv1d(in_channels, in_channels * expansion_factor, stride=1, padding=0, bias=True),
	GLU(dim=1),
	DepthwiseConv1d(in_channels, in_channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2),
	select_norm('bn',in_channels,3),
	Swish(),
	PointwiseConv1d(in_channels, in_channels, stride=1, padding=0, bias=True),
	nn.Dropout(p=dropout_p),
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return inputs + self.sequential(inputs)

	class FeedForwardModule(nn.Module):
	"""
	Conformer Feed Forward Module follow pre-norm residual units and apply layer normalization within the residual unit
	and on the input before the first linear layer. This module also apply Swish activation and dropout, which helps
	regularizing the network.
	Args:
	encoder_dim (int): Dimension of conformer encoder
	expansion_factor (int): Expansion factor of feed forward module.
	dropout_p (float): Ratio of dropout
	Inputs: inputs
	- inputs (batch, time, dim): Tensor contains input sequences
	Outputs: outputs
	- outputs (batch, time, dim): Tensor produces by feed forward module.
	"""
	def __init__(
	self,
	encoder_dim: int = 512,
	expansion_factor: int = 4,
	dropout_p: float = 0.1,
	) -> None:
	super(FeedForwardModule, self).__init__()
	self.sequential = nn.Sequential(
	nn.LayerNorm(encoder_dim),
	Linear(encoder_dim, encoder_dim * expansion_factor, bias=True),
	Swish(),
	nn.Dropout(p=dropout_p),
	Linear(encoder_dim * expansion_factor, encoder_dim, bias=True),
	nn.Dropout(p=dropout_p),
	)

	def forward(self, inputs: Tensor) -> Tensor:
	return self.sequential(inputs)