Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
·
c068a29
1
Parent(s):
b3caf82
maestro script
Browse files
scripts/utils/maestro-reorg.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
maestro_path = Path("/media/CHONK/hugo/maestro-v3.0.0")
|
6 |
+
output_path = Path("/media/CHONK/hugo/maestro-v3.0.0-split")
|
7 |
+
|
8 |
+
# split
|
9 |
+
with open(maestro_path / "maestro-v3.0.0.json") as f:
|
10 |
+
maestro = json.load(f)
|
11 |
+
|
12 |
+
breakpoint()
|
13 |
+
train = []
|
14 |
+
validation = []
|
15 |
+
test = []
|
16 |
+
for key, split in maestro["split"].items():
|
17 |
+
audio_filename = maestro['audio_filename'][key]
|
18 |
+
if split == "train":
|
19 |
+
train.append(audio_filename)
|
20 |
+
elif split == "test":
|
21 |
+
test.append(audio_filename)
|
22 |
+
elif split == "validation":
|
23 |
+
validation.append(audio_filename)
|
24 |
+
else:
|
25 |
+
raise ValueError(f"Unknown split {split}")
|
26 |
+
|
27 |
+
# symlink all files
|
28 |
+
for audio_filename in train:
|
29 |
+
p = output_path / "train" / audio_filename
|
30 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
31 |
+
os.symlink(maestro_path / audio_filename, p)
|
32 |
+
for audio_filename in validation:
|
33 |
+
p = output_path / "validation" / audio_filename
|
34 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
35 |
+
os.symlink(maestro_path / audio_filename, p)
|
36 |
+
for audio_filename in test:
|
37 |
+
p = output_path / "test" / audio_filename
|
38 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
39 |
+
os.symlink(maestro_path / audio_filename, p)
|