Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
·
4c6c719
1
Parent(s):
fff28a2
pin numy
Browse files- README.md +0 -5
- demo.py → app.py +25 -57
- setup.py +1 -0
README.md
CHANGED
@@ -41,11 +41,6 @@ Download the pretrained models from [this link](https://zenodo.org/record/813654
|
|
41 |
|
42 |
# Usage
|
43 |
|
44 |
-
First, you'll want to set up your environment
|
45 |
-
```bash
|
46 |
-
source ./env/env.sh
|
47 |
-
```
|
48 |
-
|
49 |
## Launching the Gradio Interface
|
50 |
You can launch a gradio UI to play with vampnet.
|
51 |
|
|
|
41 |
|
42 |
# Usage
|
43 |
|
|
|
|
|
|
|
|
|
|
|
44 |
## Launching the Gradio Interface
|
45 |
You can launch a gradio UI to play with vampnet.
|
46 |
|
demo.py → app.py
RENAMED
@@ -32,15 +32,6 @@ dataset = at.data.datasets.AudioDataset(
|
|
32 |
)
|
33 |
|
34 |
|
35 |
-
checkpoints = {
|
36 |
-
"vampnet": {
|
37 |
-
"coarse": "./models/vampnet/coarse.pth",
|
38 |
-
"c2f": "./models/vampnet/c2f.pth",
|
39 |
-
"codec": "./models/vampnet/codec.pth",
|
40 |
-
"full_ckpt": True
|
41 |
-
},
|
42 |
-
}
|
43 |
-
interface.checkpoint_key = "vampnet"
|
44 |
|
45 |
|
46 |
OUT_DIR = Path("gradio-outputs")
|
@@ -74,23 +65,10 @@ def load_random_audio():
|
|
74 |
|
75 |
|
76 |
def _vamp(data, return_mask=False):
|
77 |
-
|
78 |
-
# if our checkpoint key is different, we need to load a new checkpoint
|
79 |
-
if data[checkpoint_key] != interface.checkpoint_key:
|
80 |
-
print(f"loading checkpoint {data[checkpoint_key]}")
|
81 |
-
interface.lora_load(
|
82 |
-
checkpoints[data[checkpoint_key]]["coarse"],
|
83 |
-
checkpoints[data[checkpoint_key]]["c2f"],
|
84 |
-
checkpoints[data[checkpoint_key]]["full_ckpt"],
|
85 |
-
)
|
86 |
-
interface.checkpoint_key = data[checkpoint_key]
|
87 |
-
|
88 |
out_dir = OUT_DIR / str(uuid.uuid4())
|
89 |
out_dir.mkdir()
|
90 |
sig = at.AudioSignal(data[input_audio])
|
91 |
|
92 |
-
# TODO: random pitch shift of segments in the signal to prompt! window size should be a parameter, pitch shift width should be a parameter
|
93 |
-
|
94 |
z = interface.encode(sig)
|
95 |
|
96 |
ncc = data[n_conditioning_codebooks]
|
@@ -211,10 +189,7 @@ with gr.Blocks() as demo:
|
|
211 |
|
212 |
with gr.Row():
|
213 |
with gr.Column():
|
214 |
-
|
215 |
-
label="use coarse2fine",
|
216 |
-
value=True
|
217 |
-
)
|
218 |
|
219 |
manual_audio_upload = gr.File(
|
220 |
label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
|
@@ -250,38 +225,17 @@ with gr.Blocks() as demo:
|
|
250 |
# mask settings
|
251 |
with gr.Column():
|
252 |
|
253 |
-
input_pitch_shift = gr.Slider(
|
254 |
-
label="input pitch shift (semitones)",
|
255 |
-
minimum=-36,
|
256 |
-
maximum=36,
|
257 |
-
step=1,
|
258 |
-
value=0,
|
259 |
-
)
|
260 |
-
|
261 |
-
rand_mask_intensity = gr.Slider(
|
262 |
-
label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
|
263 |
-
minimum=0.0,
|
264 |
-
maximum=1.0,
|
265 |
-
value=1.0
|
266 |
-
)
|
267 |
-
|
268 |
periodic_p = gr.Slider(
|
269 |
-
label="periodic prompt (0.0 means no
|
270 |
minimum=0,
|
271 |
maximum=128,
|
272 |
step=1,
|
273 |
value=3,
|
274 |
)
|
275 |
-
|
276 |
-
label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
|
277 |
-
minimum=1,
|
278 |
-
maximum=20,
|
279 |
-
step=1,
|
280 |
-
value=1,
|
281 |
-
)
|
282 |
|
283 |
onset_mask_width = gr.Slider(
|
284 |
-
label="onset mask width (
|
285 |
minimum=0,
|
286 |
maximum=20,
|
287 |
step=1,
|
@@ -301,6 +255,20 @@ with gr.Blocks() as demo:
|
|
301 |
|
302 |
|
303 |
with gr.Accordion("extras ", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
n_conditioning_codebooks = gr.Number(
|
305 |
label="number of conditioning codebooks. probably 0",
|
306 |
value=0,
|
@@ -337,6 +305,8 @@ with gr.Blocks() as demo:
|
|
337 |
value=0.8
|
338 |
)
|
339 |
|
|
|
|
|
340 |
with gr.Accordion("sampling settings", open=False):
|
341 |
typical_filtering = gr.Checkbox(
|
342 |
label="typical filtering ",
|
@@ -356,6 +326,11 @@ with gr.Blocks() as demo:
|
|
356 |
value=64
|
357 |
)
|
358 |
|
|
|
|
|
|
|
|
|
|
|
359 |
num_steps = gr.Slider(
|
360 |
label="number of steps (should normally be between 12 and 36)",
|
361 |
minimum=1,
|
@@ -375,11 +350,6 @@ with gr.Blocks() as demo:
|
|
375 |
|
376 |
# mask settings
|
377 |
with gr.Column():
|
378 |
-
checkpoint_key = gr.Radio(
|
379 |
-
label="checkpoint",
|
380 |
-
choices=list(checkpoints.keys()),
|
381 |
-
value="spotdl"
|
382 |
-
)
|
383 |
vamp_button = gr.Button("vamp!!!")
|
384 |
output_audio = gr.Audio(
|
385 |
label="output audio",
|
@@ -414,11 +384,9 @@ with gr.Blocks() as demo:
|
|
414 |
use_coarse2fine,
|
415 |
stretch_factor,
|
416 |
onset_mask_width,
|
417 |
-
input_pitch_shift,
|
418 |
typical_filtering,
|
419 |
typical_mass,
|
420 |
typical_min_tokens,
|
421 |
-
checkpoint_key,
|
422 |
beat_mask_width,
|
423 |
beat_mask_downbeats
|
424 |
}
|
|
|
32 |
)
|
33 |
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
|
37 |
OUT_DIR = Path("gradio-outputs")
|
|
|
65 |
|
66 |
|
67 |
def _vamp(data, return_mask=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
out_dir = OUT_DIR / str(uuid.uuid4())
|
69 |
out_dir.mkdir()
|
70 |
sig = at.AudioSignal(data[input_audio])
|
71 |
|
|
|
|
|
72 |
z = interface.encode(sig)
|
73 |
|
74 |
ncc = data[n_conditioning_codebooks]
|
|
|
189 |
|
190 |
with gr.Row():
|
191 |
with gr.Column():
|
192 |
+
|
|
|
|
|
|
|
193 |
|
194 |
manual_audio_upload = gr.File(
|
195 |
label=f"upload some audio (will be randomly trimmed to max of {interface.coarse.chunk_size_s:.2f}s)",
|
|
|
225 |
# mask settings
|
226 |
with gr.Column():
|
227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
periodic_p = gr.Slider(
|
229 |
+
label="periodic prompt (0.0 means no prompt, 2 - lots of hints, 8 - a couple of hints, 16 - occasional hint, 32 - very occasional hint, etc)",
|
230 |
minimum=0,
|
231 |
maximum=128,
|
232 |
step=1,
|
233 |
value=3,
|
234 |
)
|
235 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
onset_mask_width = gr.Slider(
|
238 |
+
label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
|
239 |
minimum=0,
|
240 |
maximum=20,
|
241 |
step=1,
|
|
|
255 |
|
256 |
|
257 |
with gr.Accordion("extras ", open=False):
|
258 |
+
rand_mask_intensity = gr.Slider(
|
259 |
+
label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
|
260 |
+
minimum=0.0,
|
261 |
+
maximum=1.0,
|
262 |
+
value=1.0
|
263 |
+
)
|
264 |
+
|
265 |
+
periodic_w = gr.Slider(
|
266 |
+
label="periodic prompt width (steps, 1 step ~= 10milliseconds)",
|
267 |
+
minimum=1,
|
268 |
+
maximum=20,
|
269 |
+
step=1,
|
270 |
+
value=1,
|
271 |
+
)
|
272 |
n_conditioning_codebooks = gr.Number(
|
273 |
label="number of conditioning codebooks. probably 0",
|
274 |
value=0,
|
|
|
305 |
value=0.8
|
306 |
)
|
307 |
|
308 |
+
|
309 |
+
|
310 |
with gr.Accordion("sampling settings", open=False):
|
311 |
typical_filtering = gr.Checkbox(
|
312 |
label="typical filtering ",
|
|
|
326 |
value=64
|
327 |
)
|
328 |
|
329 |
+
use_coarse2fine = gr.Checkbox(
|
330 |
+
label="use coarse2fine",
|
331 |
+
value=True
|
332 |
+
)
|
333 |
+
|
334 |
num_steps = gr.Slider(
|
335 |
label="number of steps (should normally be between 12 and 36)",
|
336 |
minimum=1,
|
|
|
350 |
|
351 |
# mask settings
|
352 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
353 |
vamp_button = gr.Button("vamp!!!")
|
354 |
output_audio = gr.Audio(
|
355 |
label="output audio",
|
|
|
384 |
use_coarse2fine,
|
385 |
stretch_factor,
|
386 |
onset_mask_width,
|
|
|
387 |
typical_filtering,
|
388 |
typical_mass,
|
389 |
typical_min_tokens,
|
|
|
390 |
beat_mask_width,
|
391 |
beat_mask_downbeats
|
392 |
}
|
setup.py
CHANGED
@@ -28,6 +28,7 @@ setup(
|
|
28 |
install_requires=[
|
29 |
"torch",
|
30 |
"argbind>=0.3.2",
|
|
|
31 |
# "audiotools @ git+https://github.com/descriptinc/audiotools.git@f35914b5b3c6f1bf589cd09481478d741538828e",
|
32 |
# "dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
|
33 |
"gradio",
|
|
|
28 |
install_requires=[
|
29 |
"torch",
|
30 |
"argbind>=0.3.2",
|
31 |
+
"numpy==1.22",
|
32 |
# "audiotools @ git+https://github.com/descriptinc/audiotools.git@f35914b5b3c6f1bf589cd09481478d741538828e",
|
33 |
# "dac @ git+https://github.com/descriptinc/descript-audio-codec.git",
|
34 |
"gradio",
|