Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
·
4a2dc41
1
Parent(s):
f1ccdc1
interface, cleanup imputation code
Browse files- env/data.sh +1 -1
- requirements.txt +1 -1
- scripts/exp/train.py +22 -14
- vampnet/interface.py +7 -1
env/data.sh
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
export PATH_TO_DATA=~/data
|
2 |
|
3 |
if [[ $(hostname) == "oon17" ]]; then
|
4 |
-
export PATH_TO_DATA=/
|
5 |
fi
|
6 |
|
7 |
if [[ $(hostname) == "oon19" ]]; then
|
|
|
1 |
export PATH_TO_DATA=~/data
|
2 |
|
3 |
if [[ $(hostname) == "oon17" ]]; then
|
4 |
+
export PATH_TO_DATA=/data/
|
5 |
fi
|
6 |
|
7 |
if [[ $(hostname) == "oon19" ]]; then
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
argbind>=0.3.1
|
2 |
pytorch-ignite
|
3 |
rich
|
4 |
-
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@
|
5 |
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
|
6 |
tqdm
|
7 |
tensorboard
|
|
|
1 |
argbind>=0.3.1
|
2 |
pytorch-ignite
|
3 |
rich
|
4 |
+
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
5 |
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
|
6 |
tqdm
|
7 |
tensorboard
|
scripts/exp/train.py
CHANGED
@@ -547,30 +547,38 @@ def train(
|
|
547 |
|
548 |
def save_imputation(self, z: torch.Tensor):
|
549 |
# imputations
|
550 |
-
|
551 |
-
|
552 |
|
553 |
-
|
554 |
-
|
|
|
|
|
555 |
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
|
|
|
|
|
|
|
|
|
|
560 |
)
|
561 |
-
|
562 |
-
|
|
|
563 |
|
564 |
imputed = []
|
565 |
for i in range(len(z)):
|
566 |
imputed.append(
|
567 |
-
|
568 |
codec=codec,
|
569 |
time_steps=z.shape[-1],
|
570 |
start_tokens=z[i][None, ...],
|
571 |
-
mask=
|
572 |
-
)
|
573 |
-
)
|
574 |
imputed = AudioSignal.batch(imputed)
|
575 |
|
576 |
for i in range(len(val_idx)):
|
|
|
547 |
|
548 |
def save_imputation(self, z: torch.Tensor):
|
549 |
# imputations
|
550 |
+
_prefix_amt = prefix_amt
|
551 |
+
_suffix_amt = suffix_amt
|
552 |
|
553 |
+
if _prefix_amt == 0:
|
554 |
+
_prefix_amt = 0.25
|
555 |
+
if _suffix_amt == 0:
|
556 |
+
_suffix_amt = 0.25
|
557 |
|
558 |
+
n_prefix = int(z.shape[-1] * _prefix_amt)
|
559 |
+
n_suffix = int(z.shape[-1] * _suffix_amt)
|
560 |
+
downsample_factor = None
|
561 |
+
|
562 |
+
vn = accel.unwrap(model)
|
563 |
+
|
564 |
+
z_mask, mask = vn.add_noise(
|
565 |
+
z, r=0.0, n_prefix=n_prefix, n_suffix=n_suffix,
|
566 |
+
downsample_factor=downsample_factor
|
567 |
)
|
568 |
+
|
569 |
+
imputed_noisy = vn.to_signal(z_mask, codec)
|
570 |
+
imputed_true = vn.to_signal(z, codec)
|
571 |
|
572 |
imputed = []
|
573 |
for i in range(len(z)):
|
574 |
imputed.append(
|
575 |
+
vn.sample(
|
576 |
codec=codec,
|
577 |
time_steps=z.shape[-1],
|
578 |
start_tokens=z[i][None, ...],
|
579 |
+
mask=mask[i][None, ...],
|
580 |
+
)
|
581 |
+
)
|
582 |
imputed = AudioSignal.batch(imputed)
|
583 |
|
584 |
for i in range(len(val_idx)):
|
vampnet/interface.py
CHANGED
@@ -53,7 +53,13 @@ class Interface:
|
|
53 |
|
54 |
@torch.inference_mode()
|
55 |
def encode(self, signal: AudioSignal):
|
56 |
-
signal =
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
58 |
return z
|
59 |
|
|
|
53 |
|
54 |
@torch.inference_mode()
|
55 |
def encode(self, signal: AudioSignal):
|
56 |
+
signal = (
|
57 |
+
signal.clone().to(self.device)
|
58 |
+
.resample(self.codec.sample_rate)
|
59 |
+
.to_mono()
|
60 |
+
.normalize(-24)
|
61 |
+
.ensure_max_of_audio(1.0)
|
62 |
+
)
|
63 |
z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
|
64 |
return z
|
65 |
|