Spaces:
Runtime error
Runtime error
Upload vocoder/audio.py with huggingface_hub
Browse files- vocoder/audio.py +108 -0
vocoder/audio.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import numpy as np
|
3 |
+
import librosa
|
4 |
+
import vocoder.hparams as hp
|
5 |
+
from scipy.signal import lfilter
|
6 |
+
import soundfile as sf
|
7 |
+
|
8 |
+
|
9 |
+
def label_2_float(x, bits) :
|
10 |
+
return 2 * x / (2**bits - 1.) - 1.
|
11 |
+
|
12 |
+
|
13 |
+
def float_2_label(x, bits) :
|
14 |
+
assert abs(x).max() <= 1.0
|
15 |
+
x = (x + 1.) * (2**bits - 1) / 2
|
16 |
+
return x.clip(0, 2**bits - 1)
|
17 |
+
|
18 |
+
|
19 |
+
def load_wav(path) :
|
20 |
+
return librosa.load(str(path), sr=hp.sample_rate)[0]
|
21 |
+
|
22 |
+
|
23 |
+
def save_wav(x, path) :
|
24 |
+
sf.write(path, x.astype(np.float32), hp.sample_rate)
|
25 |
+
|
26 |
+
|
27 |
+
def split_signal(x) :
|
28 |
+
unsigned = x + 2**15
|
29 |
+
coarse = unsigned // 256
|
30 |
+
fine = unsigned % 256
|
31 |
+
return coarse, fine
|
32 |
+
|
33 |
+
|
34 |
+
def combine_signal(coarse, fine) :
|
35 |
+
return coarse * 256 + fine - 2**15
|
36 |
+
|
37 |
+
|
38 |
+
def encode_16bits(x) :
|
39 |
+
return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)
|
40 |
+
|
41 |
+
|
42 |
+
mel_basis = None
|
43 |
+
|
44 |
+
|
45 |
+
def linear_to_mel(spectrogram):
|
46 |
+
global mel_basis
|
47 |
+
if mel_basis is None:
|
48 |
+
mel_basis = build_mel_basis()
|
49 |
+
return np.dot(mel_basis, spectrogram)
|
50 |
+
|
51 |
+
|
52 |
+
def build_mel_basis():
|
53 |
+
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
|
54 |
+
|
55 |
+
|
56 |
+
def normalize(S):
|
57 |
+
return np.clip((S - hp.min_level_db) / -hp.min_level_db, 0, 1)
|
58 |
+
|
59 |
+
|
60 |
+
def denormalize(S):
|
61 |
+
return (np.clip(S, 0, 1) * -hp.min_level_db) + hp.min_level_db
|
62 |
+
|
63 |
+
|
64 |
+
def amp_to_db(x):
|
65 |
+
return 20 * np.log10(np.maximum(1e-5, x))
|
66 |
+
|
67 |
+
|
68 |
+
def db_to_amp(x):
|
69 |
+
return np.power(10.0, x * 0.05)
|
70 |
+
|
71 |
+
|
72 |
+
def spectrogram(y):
|
73 |
+
D = stft(y)
|
74 |
+
S = amp_to_db(np.abs(D)) - hp.ref_level_db
|
75 |
+
return normalize(S)
|
76 |
+
|
77 |
+
|
78 |
+
def melspectrogram(y):
|
79 |
+
D = stft(y)
|
80 |
+
S = amp_to_db(linear_to_mel(np.abs(D)))
|
81 |
+
return normalize(S)
|
82 |
+
|
83 |
+
|
84 |
+
def stft(y):
|
85 |
+
return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=hp.hop_length, win_length=hp.win_length)
|
86 |
+
|
87 |
+
|
88 |
+
def pre_emphasis(x):
|
89 |
+
return lfilter([1, -hp.preemphasis], [1], x)
|
90 |
+
|
91 |
+
|
92 |
+
def de_emphasis(x):
|
93 |
+
return lfilter([1], [1, -hp.preemphasis], x)
|
94 |
+
|
95 |
+
|
96 |
+
def encode_mu_law(x, mu) :
|
97 |
+
mu = mu - 1
|
98 |
+
fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
|
99 |
+
return np.floor((fx + 1) / 2 * mu + 0.5)
|
100 |
+
|
101 |
+
|
102 |
+
def decode_mu_law(y, mu, from_labels=True) :
|
103 |
+
if from_labels:
|
104 |
+
y = label_2_float(y, math.log2(mu))
|
105 |
+
mu = mu - 1
|
106 |
+
x = np.sign(y) / mu * ((1 + mu) ** np.abs(y) - 1)
|
107 |
+
return x
|
108 |
+
|