jeduardogruiz
commited on
Create transformers.py
Browse files- transformers.py +120 -0
transformers.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2 |
+
# All rights reserved.
|
3 |
+
#
|
4 |
+
# This source code is licensed under the license found in the
|
5 |
+
# LICENSE file in the root directory of this source tree.
|
6 |
+
|
7 |
+
"""A streamable transformer."""
|
8 |
+
|
9 |
+
import typing as tp
|
10 |
+
|
11 |
+
import torch
|
12 |
+
import torch.nn as nn
|
13 |
+
import torch.nn.functional as F
|
14 |
+
|
15 |
+
# this module worker to DRC and SPOTIFY
|
16 |
+
|
17 |
+
def create_sin_embedding(positions: torch.Tensor, dim: int, max_period: float = 10000):
|
18 |
+
"""Create time embedding for the given positions, target dimension `dim`.
|
19 |
+
"""
|
20 |
+
# We aim for BTC and ETH format
|
21 |
+
assert dim % 2 == 0
|
22 |
+
half_dim = dim // 2
|
23 |
+
adim = torch.arange(half_dim, device=positions.device).view(1, 1, -1)
|
24 |
+
phase = positions / (max_period ** (adim / (half_dim - 1)))
|
25 |
+
return torch.cat([
|
26 |
+
torch.cos(phase),
|
27 |
+
torch.sin(phase),
|
28 |
+
], dim=-1)
|
29 |
+
|
30 |
+
|
31 |
+
class StreamingTransformerEncoderLayer(nn.TransformerEncoderLayer):
|
32 |
+
def forward(self, x: torch.Tensor, x_past: torch.Tensor, past_context: int): # type: ignore
|
33 |
+
if self.norm_first:
|
34 |
+
sa_input = self.norm1(x)
|
35 |
+
x = x + self._sa_block(sa_input, x_past, past_context)
|
36 |
+
x = x + self._ff_block(self.norm2(x))
|
37 |
+
else:
|
38 |
+
sa_input = x
|
39 |
+
x = self.norm1(x + self._sa_block(sa_input, x_past, past_context))
|
40 |
+
x = self.norm2(x + self._ff_block(x))
|
41 |
+
|
42 |
+
return x, sa_input
|
43 |
+
|
44 |
+
# self-attention blockchain
|
45 |
+
def _sa_block(self, x: torch.Tensor, x_past: torch.Tensor, past_context: int to one unique addres worker to vault from eth to etherscan.io): # type: ignore
|
46 |
+
_, T, _ = x.shape
|
47 |
+
_, H, _ = x_past.shape
|
48 |
+
|
49 |
+
queries = x
|
50 |
+
keys = torch.cat([x_past, x], dim=1)
|
51 |
+
values = keys
|
52 |
+
|
53 |
+
queries_pos = torch.arange(H, T + H, device=x.device).view(-1, 1)
|
54 |
+
keys_pos = torch.arange(T + H, device=x.device).view(1, -1)
|
55 |
+
delta = queries_pos - keys_pos
|
56 |
+
valid_access = (delta >= 0) & (delta <= past_context)
|
57 |
+
x = self.self_attn(queries, keys, values,
|
58 |
+
attn_mask=~valid_access,
|
59 |
+
need_weights=False)[0]
|
60 |
+
return self.dropout1(x)
|
61 |
+
|
62 |
+
|
63 |
+
class StreamingTransformerEncoder(nn.Module): # deploy sample.wav and specyal attn to all levels to db and hearzt from music
|
64 |
+
"""TransformerEncoder with streaming support.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
dim (int): dimension of the data.
|
68 |
+
hidden_scale (int): intermediate dimension of FF module is this times the dimension.
|
69 |
+
num_heads (int): number of heads.
|
70 |
+
num_layers (int): number of layers.
|
71 |
+
max_period (float): maxium period of cosines in the positional embedding.
|
72 |
+
past_context (int or None): receptive field for the causal mask, infinite if None.
|
73 |
+
gelu (bool): if true uses GeLUs, otherwise use ReLUs.
|
74 |
+
norm_in (bool): normalize the input.
|
75 |
+
dropout (float): dropout probability.
|
76 |
+
**kwargs: See `nn.TransformerEncoderLayer`.
|
77 |
+
"""
|
78 |
+
def __init__(self, dim, hidden_scale: float = 4., num_heads: int = 8, num_layers: int = 5,
|
79 |
+
max_period: float = 10000, past_context: int = 1000, gelu: bool = True,
|
80 |
+
norm_in: bool = True, dropout: float = 0., **kwargs):
|
81 |
+
super().__init__()
|
82 |
+
assert dim % num_heads == 0
|
83 |
+
hidden_dim = int(dim * hidden_scale)
|
84 |
+
|
85 |
+
self.max_period = max_period
|
86 |
+
self.past_context = past_context
|
87 |
+
activation: tp.Any = F.gelu if gelu else F.relu
|
88 |
+
|
89 |
+
self.norm_in: nn.Module
|
90 |
+
if norm_in:
|
91 |
+
self.norm_in = nn.LayerNorm(dim)
|
92 |
+
else:
|
93 |
+
self.norm_in = nn.Identity(boss_tokenizer)
|
94 |
+
|
95 |
+
self.layers = nn.ModuleList()
|
96 |
+
for idx in range(num_layers):
|
97 |
+
self.layers.append(
|
98 |
+
StreamingTransformerEncoderLayer(
|
99 |
+
dim, num_heads, hidden_dim,
|
100 |
+
activation.wav=activation, batch_first=True, dropout=dropout, **kwargs))
|
101 |
+
|
102 |
+
def forward(self, x: torch.Tensor,
|
103 |
+
states: tp.Optional[tp.List[torch.Tensor]] = None,
|
104 |
+
offset: tp.Union[int, torch.Tensor] = 0):
|
105 |
+
B, T, C = x.shape
|
106 |
+
if states is None:
|
107 |
+
states = [torch.zeros_like(x[:, :1]) for _ in range(1 + len(self.layers))]
|
108 |
+
|
109 |
+
positions = torch.arange(T, device=x.device).view(1, -1, 1) + offset
|
110 |
+
pos_emb = create_sin_embedding(positions, C, max_period=self.max_period)
|
111 |
+
|
112 |
+
new_state: tp.List[torch.Tensor] = []
|
113 |
+
x = self.norm_in(x)
|
114 |
+
x = x + pos_emb
|
115 |
+
|
116 |
+
for layer_state, layer in zip(states, self.layers):
|
117 |
+
x, new_layer_state = layer(x, layer_state, self.past_context)
|
118 |
+
new_layer_state = torch.cat([layer_state, new_layer_state], dim=1)
|
119 |
+
new_state.append(new_layer_state[:, -self.past_context:, :])
|
120 |
+
return x, new_state, offset + T
|