keithhon commited on
Commit
e73f7cb
·
1 Parent(s): a868039

Upload vocoder/audio.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. vocoder/audio.py +108 -0
vocoder/audio.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import numpy as np
3
+ import librosa
4
+ import vocoder.hparams as hp
5
+ from scipy.signal import lfilter
6
+ import soundfile as sf
7
+
8
+
9
+ def label_2_float(x, bits) :
10
+ return 2 * x / (2**bits - 1.) - 1.
11
+
12
+
13
+ def float_2_label(x, bits) :
14
+ assert abs(x).max() <= 1.0
15
+ x = (x + 1.) * (2**bits - 1) / 2
16
+ return x.clip(0, 2**bits - 1)
17
+
18
+
19
+ def load_wav(path) :
20
+ return librosa.load(str(path), sr=hp.sample_rate)[0]
21
+
22
+
23
+ def save_wav(x, path) :
24
+ sf.write(path, x.astype(np.float32), hp.sample_rate)
25
+
26
+
27
+ def split_signal(x) :
28
+ unsigned = x + 2**15
29
+ coarse = unsigned // 256
30
+ fine = unsigned % 256
31
+ return coarse, fine
32
+
33
+
34
+ def combine_signal(coarse, fine) :
35
+ return coarse * 256 + fine - 2**15
36
+
37
+
38
+ def encode_16bits(x) :
39
+ return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)
40
+
41
+
42
+ mel_basis = None
43
+
44
+
45
+ def linear_to_mel(spectrogram):
46
+ global mel_basis
47
+ if mel_basis is None:
48
+ mel_basis = build_mel_basis()
49
+ return np.dot(mel_basis, spectrogram)
50
+
51
+
52
+ def build_mel_basis():
53
+ return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
54
+
55
+
56
+ def normalize(S):
57
+ return np.clip((S - hp.min_level_db) / -hp.min_level_db, 0, 1)
58
+
59
+
60
+ def denormalize(S):
61
+ return (np.clip(S, 0, 1) * -hp.min_level_db) + hp.min_level_db
62
+
63
+
64
+ def amp_to_db(x):
65
+ return 20 * np.log10(np.maximum(1e-5, x))
66
+
67
+
68
+ def db_to_amp(x):
69
+ return np.power(10.0, x * 0.05)
70
+
71
+
72
+ def spectrogram(y):
73
+ D = stft(y)
74
+ S = amp_to_db(np.abs(D)) - hp.ref_level_db
75
+ return normalize(S)
76
+
77
+
78
+ def melspectrogram(y):
79
+ D = stft(y)
80
+ S = amp_to_db(linear_to_mel(np.abs(D)))
81
+ return normalize(S)
82
+
83
+
84
+ def stft(y):
85
+ return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=hp.hop_length, win_length=hp.win_length)
86
+
87
+
88
+ def pre_emphasis(x):
89
+ return lfilter([1, -hp.preemphasis], [1], x)
90
+
91
+
92
+ def de_emphasis(x):
93
+ return lfilter([1], [1, -hp.preemphasis], x)
94
+
95
+
96
+ def encode_mu_law(x, mu) :
97
+ mu = mu - 1
98
+ fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
99
+ return np.floor((fx + 1) / 2 * mu + 0.5)
100
+
101
+
102
+ def decode_mu_law(y, mu, from_labels=True) :
103
+ if from_labels:
104
+ y = label_2_float(y, math.log2(mu))
105
+ mu = mu - 1
106
+ x = np.sign(y) / mu * ((1 + mu) ** np.abs(y) - 1)
107
+ return x
108
+