Spaces:

descript
/

vampnet

Sleeping

Hugo Flores Garcia commited on Mar 22, 2023

Commit

4a2dc41

1 Parent(s): f1ccdc1

interface, cleanup imputation code

Files changed (4) hide show

env/data.sh CHANGED Viewed

@@ -1,7 +1,7 @@
 export PATH_TO_DATA=~/data
 if [[ $(hostname) == "oon17" ]]; then
-    export PATH_TO_DATA=/home/prem/shared/data/
 fi
 if [[ $(hostname) == "oon19" ]]; then

 export PATH_TO_DATA=~/data
 if [[ $(hostname) == "oon17" ]]; then
+    export PATH_TO_DATA=/data/
 fi
 if [[ $(hostname) == "oon19" ]]; then

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 argbind>=0.3.1
 pytorch-ignite
 rich
-audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@0.6.3
 lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
 tqdm
 tensorboard

 argbind>=0.3.1
 pytorch-ignite
 rich
+audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
 lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
 tqdm
 tensorboard

scripts/exp/train.py CHANGED Viewed

@@ -547,30 +547,38 @@ def train(
         def save_imputation(self, z: torch.Tensor):
             # imputations
-            mask_begin = z.shape[-1] // 4
-            mask_end = (z.shape[-1] * 3) // 4
-            imp_mask = torch.zeros(z.shape[0], z.shape[-1]).to(accel.device).int()
-            imp_mask[:, mask_begin:mask_end] = 1
-            imp_noisy = (
-                z * (1 - imp_mask[:, None, :])
-                + torch.randint_like(z, 0, accel.unwrap(model).vocab_size)
-                * imp_mask[:, None, :]
             )
-            imputed_noisy = accel.unwrap(model).to_signal(imp_noisy, codec)
-            imputed_true = accel.unwrap(model).to_signal(z, codec)
             imputed = []
             for i in range(len(z)):
                 imputed.append(
-                    accel.unwrap(model).sample(
                         codec=codec,
                         time_steps=z.shape[-1],
                         start_tokens=z[i][None, ...],
-                        mask=imp_mask[i][None, ...],
-                    )
-                )
             imputed = AudioSignal.batch(imputed)
             for i in range(len(val_idx)):

         def save_imputation(self, z: torch.Tensor):
             # imputations
+            _prefix_amt = prefix_amt
+            _suffix_amt = suffix_amt
+            if _prefix_amt == 0:
+                _prefix_amt = 0.25
+            if _suffix_amt == 0:
+                _suffix_amt = 0.25
+            n_prefix = int(z.shape[-1] * _prefix_amt)
+            n_suffix = int(z.shape[-1] * _suffix_amt)
+            downsample_factor = None
+            vn = accel.unwrap(model)
+            z_mask, mask = vn.add_noise(
+                z, r=0.0, n_prefix=n_prefix, n_suffix=n_suffix,
+                downsample_factor=downsample_factor
             )
+            imputed_noisy = vn.to_signal(z_mask, codec)
+            imputed_true = vn.to_signal(z, codec)
             imputed = []
             for i in range(len(z)):
                 imputed.append(
+                    vn.sample(
                         codec=codec,
                         time_steps=z.shape[-1],
                         start_tokens=z[i][None, ...],
+                        mask=mask[i][None, ...],
+                    )
+                )
             imputed = AudioSignal.batch(imputed)
             for i in range(len(val_idx)):

vampnet/interface.py CHANGED Viewed

@@ -53,7 +53,13 @@ class Interface:
     @torch.inference_mode()
     def encode(self, signal: AudioSignal):
-        signal = signal.clone().to(self.device).resample(self.codec.sample_rate).to_mono()
         z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
         return z

     @torch.inference_mode()
     def encode(self, signal: AudioSignal):
+        signal = (
+            signal.clone().to(self.device)
+            .resample(self.codec.sample_rate)
+            .to_mono()
+            .normalize(-24)
+            .ensure_max_of_audio(1.0)
+        )
         z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
         return z