Initial commit
Browse files- .gitattributes +66 -35
- README.md +143 -0
- assets/1_1epoch.wav +3 -0
- assets/1_Base_stable-audio-open-1.0.wav +3 -0
- assets/2_1epoch.wav +3 -0
- assets/2_Base_stable-audio-open-1.0.wav +3 -0
- assets/3_1epoch.wav +3 -0
- assets/3_Base_stable-audio-open-1.0.wav +3 -0
- assets/4_1epoch.wav +3 -0
- assets/4_Base_stable-audio-open-1.0.wav +3 -0
- assets/music_3_illustration.jpg +0 -0
- assets/music_4_illustration.jpg +0 -0
- model.safetensors +3 -0
.gitattributes
CHANGED
@@ -1,35 +1,66 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/image_0_0.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
assets/image_1_0.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
assets/image_10_0.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
assets/image_11_0.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
assets/image_12_0.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
assets/image_13_0.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
assets/image_14_0.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
assets/image_15_0.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
assets/image_16_0.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
assets/image_2_0.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
assets/image_3_0.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
assets/image_4_0.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
assets/image_5_0.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
assets/image_6_0.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
assets/image_7_0.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
assets/image_8_0.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
assets/image_9_0.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
assets/demo_cfg_3_00000001.wav filter=lfs diff=lfs merge=lfs -text
|
54 |
+
assets/demo_cfg_6_00000001.wav filter=lfs diff=lfs merge=lfs -text
|
55 |
+
assets/demo_cfg_9_00000001.wav filter=lfs diff=lfs merge=lfs -text
|
56 |
+
assets/1_1epoch.wav filter=lfs diff=lfs merge=lfs -text
|
57 |
+
assets/1_Base_stable-audio-open-1.0.wav.wav filter=lfs diff=lfs merge=lfs -text
|
58 |
+
assets/2_1epoch.wav filter=lfs diff=lfs merge=lfs -text
|
59 |
+
assets/2_Base_stable-audio-open-1.0.wav.wav filter=lfs diff=lfs merge=lfs -text
|
60 |
+
assets/3_1epoch.wav filter=lfs diff=lfs merge=lfs -text
|
61 |
+
assets/3_Base_stable-audio-open-1.0.wav filter=lfs diff=lfs merge=lfs -text
|
62 |
+
assets/4_1epoch.wav filter=lfs diff=lfs merge=lfs -text
|
63 |
+
assets/4_Base_stable-audio-open-1.0.wav.wav filter=lfs diff=lfs merge=lfs -text
|
64 |
+
assets/1_Base_stable-audio-open-1.0.wav filter=lfs diff=lfs merge=lfs -text
|
65 |
+
assets/2_Base_stable-audio-open-1.0.wav filter=lfs diff=lfs merge=lfs -text
|
66 |
+
assets/4_Base_stable-audio-open-1.0.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
library_name: stable-audio-tools
|
5 |
+
license: other
|
6 |
+
license_name: stable-audio-community
|
7 |
+
pipeline_tag: text-to-audio
|
8 |
+
tags:
|
9 |
+
- text-to-audio
|
10 |
+
inference: true
|
11 |
+
widget:
|
12 |
+
- src: ./assets/demo_cfg_3_00000001.wav
|
13 |
+
example_title: 'Unconditional (blank prompt)'
|
14 |
+
parameters:
|
15 |
+
negative_prompt: 'blurry, cropped, ugly'
|
16 |
+
- text: 'Chill soft wake up, slow down alt, night get lucky dance, relax music introspective 2017 2018 2019 2020 2021 2022, acoustic atmosphere uplifting dreams, dreamy indie pop, electric trap, percussion, higher reverb, really intensity melody, goodbye'
|
17 |
+
parameters:
|
18 |
+
negative_prompt: 'blurry, cropped, ugly'
|
19 |
+
output:
|
20 |
+
url: ./assets/music_3_illustration.jpg
|
21 |
+
- text: 'Chill hip-hop beat, chillhop, lofi pop, favorite music'
|
22 |
+
parameters:
|
23 |
+
negative_prompt: 'blurry, cropped, ugly'
|
24 |
+
output:
|
25 |
+
url: ./assets/music_4_illustration.jpg
|
26 |
+
---
|
27 |
+
|
28 |
+
<details>
|
29 |
+
<summary>Comparison Table</summary>
|
30 |
+
<table style="width:100%; border-collapse: collapse;">
|
31 |
+
<colgroup>
|
32 |
+
<col style="width: 25%;">
|
33 |
+
<col style="width: 37.5%;">
|
34 |
+
<col style="width: 37.5%;">
|
35 |
+
</colgroup>
|
36 |
+
<tr>
|
37 |
+
<th>Prompt</th>
|
38 |
+
<th>Base Model</th>
|
39 |
+
<th>Fine-Tuned</th>
|
40 |
+
</tr>
|
41 |
+
<tr>
|
42 |
+
<td style="font-size: smaller; padding: 0.5px; word-wrap: break-word;">
|
43 |
+
Feel-Good Vibes and Dramatic Atmosphere, alone hero, epic, get good yeah, better last night pop, follow follow, echoing, powerful vocal driving melancholic vocals dramatic Features rising tension, progressive electro house, far away, by Alan Walker, popular song tempo, girl, female synth, popular, titled: legend never die
|
44 |
+
</td>
|
45 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
46 |
+
<audio controls style="width: 100%;">
|
47 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/1_Base_stable-audio-open-1.0.wav" type="audio/wav">
|
48 |
+
</audio>
|
49 |
+
</td>
|
50 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
51 |
+
<audio controls style="width: 100%;">
|
52 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/1_1epoch.wav" type="audio/wav">
|
53 |
+
</audio>
|
54 |
+
</td>
|
55 |
+
</tr>
|
56 |
+
<tr>
|
57 |
+
<td style="font-size: smaller; padding: 0.5px; word-wrap: break-word;">
|
58 |
+
Beautiful music progressive electro slap mood, upbeat, heavy bass, melancholic, hopeful; drums, vocals, dynamic shifts, building intensity, run far away, repetitive, let let go, think of us, titled popular lyrics: Mirror's Edge, popular lyrics say: "still still alive"
|
59 |
+
</td>
|
60 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
61 |
+
<audio controls style="width: 100%;">
|
62 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/2_Base_stable-audio-open-1.0.wav" type="audio/wav">
|
63 |
+
</audio>
|
64 |
+
</td>
|
65 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
66 |
+
<audio controls style="width: 100%;">
|
67 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/2_1epoch.wav" type="audio/wav">
|
68 |
+
</audio>
|
69 |
+
</td>
|
70 |
+
</tr>
|
71 |
+
<tr>
|
72 |
+
<td style="font-size: smaller; padding: 0.5px; word-wrap: break-word;">
|
73 |
+
Chill soft wake up, slow down alt, night get lucky dance, relax music introspective 2017 2018 2019 2020 2021 2022, acoustic atmosphere uplifting dreams, dreamy indie pop, electric trap, percussion, higher reverb, really intensity melody, goodbye
|
74 |
+
</td>
|
75 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
76 |
+
<audio controls style="width: 100%;">
|
77 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/3_Base_stable-audio-open-1.0.wav" type="audio/wav">
|
78 |
+
</audio>
|
79 |
+
</td>
|
80 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
81 |
+
<audio controls style="width: 100%;">
|
82 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/3_1epoch.wav" type="audio/wav">
|
83 |
+
</audio>
|
84 |
+
</td>
|
85 |
+
</tr>
|
86 |
+
<tr>
|
87 |
+
<td style="font-size: smaller; padding: 0.5px; word-wrap: break-word;">
|
88 |
+
Chill hip-hop beat, chillhop, lofi pop, favorite music
|
89 |
+
</td>
|
90 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
91 |
+
<audio controls style="width: 100%;">
|
92 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/4_Base_stable-audio-open-1.0.wav" type="audio/wav">
|
93 |
+
</audio>
|
94 |
+
</td>
|
95 |
+
<td style="padding: 0.5px; vertical-align: middle; text-align: center;">
|
96 |
+
<audio controls style="width: 100%;">
|
97 |
+
<source src="https://huggingface.co/Nekochu/stable-audio-open-1.0-Music/resolve/main/assets/4_1epoch.wav" type="audio/wav">
|
98 |
+
</audio>
|
99 |
+
</td>
|
100 |
+
</tr>
|
101 |
+
</table>
|
102 |
+
|
103 |
+
<div>
|
104 |
+
<Gallery />
|
105 |
+
<div class="not-prose mb-2 flex flex-wrap items-start gap-4 sm:mr-6 sm:flex-row">
|
106 |
+
<audio controls style="width: calc(50% - 8px);">
|
107 |
+
<source src="./assets/3_1epoch.wav" type="audio/wav">
|
108 |
+
</audio>
|
109 |
+
<audio controls style="width: calc(50% - 8px);">
|
110 |
+
<source src="./assets/4_1epoch.wav" type="audio/wav">
|
111 |
+
</audio>
|
112 |
+
</div>
|
113 |
+
</div>
|
114 |
+
|
115 |
+
|
116 |
+
<details open>
|
117 |
+
<summary>Showcase Model Details</summary>
|
118 |
+
<div>
|
119 |
+
<h3>Test Settings:</h3>
|
120 |
+
<ul>
|
121 |
+
<li>CFG: 7.0</li>
|
122 |
+
<li>Steps: 100</li>
|
123 |
+
<li>Seed: -1</li>
|
124 |
+
</ul>
|
125 |
+
<p>Prompt have been chosen based on the top tagged words except last prompt which is used to compare effect on non-trained tags</p>
|
126 |
+
</div>
|
127 |
+
</details>
|
128 |
+
</details>
|
129 |
+
|
130 |
+
<details>
|
131 |
+
<summary>Training</summary>
|
132 |
+
|
133 |
+
### Dataset: 2-3 min music length
|
134 |
+
- All of my Liked music [download and auto label](https://pastebin.com/z1bkZyqe) so mostly copyright.
|
135 |
+
- Total number of samples: ~1383
|
136 |
+
- `"random_crop": true` in [dataset_config.json](https://github.com/Stability-AI/stable-audio-tools/issues/99#issuecomment-2174885688)
|
137 |
+
|
138 |
+
### Settings:
|
139 |
+
- Training epochs: 1
|
140 |
+
- Training steps: 1383
|
141 |
+
- Learning rate: 1e-05
|
142 |
+
|
143 |
+
</details>
|
assets/1_1epoch.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b363327a1a1d195f0c545be4632e7a71b9f5ceba3ce67b46823baa586d85d21
|
3 |
+
size 8388652
|
assets/1_Base_stable-audio-open-1.0.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad0b410916b4f1c6e1403072d4734248c4b37bb6d2b4cf11b76953cc67d97cfc
|
3 |
+
size 8388652
|
assets/2_1epoch.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec1d8e560ea527f0a7f00cf6797cc796c7257ed8fb019daf417730cc4ac098f0
|
3 |
+
size 8388652
|
assets/2_Base_stable-audio-open-1.0.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:429c32b94dde51d2bff7d5d559207f71c73f51543c263eb4eb180616dbfa34fe
|
3 |
+
size 8388652
|
assets/3_1epoch.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:086f24825f26390dafcdeed9b06f9811119f1db4007afb9f02e77ce9d0069c37
|
3 |
+
size 8388652
|
assets/3_Base_stable-audio-open-1.0.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b298af33a4e0dc09a14b9f66b9216d5329fd1a560805fd26e5777c4351d7296
|
3 |
+
size 8388652
|
assets/4_1epoch.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df16b39c52347022acee131b9575d8969af233cce1e1f232ace143532150809c
|
3 |
+
size 8388652
|
assets/4_Base_stable-audio-open-1.0.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9db9bd6619c56211e9abfb03ddeb70376e21a71e9bb175f79c04b291ca164ca
|
3 |
+
size 8388652
|
assets/music_3_illustration.jpg
ADDED
assets/music_4_illustration.jpg
ADDED
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd26da573977be3a5b3d96092869ad41ad70cd5ae68f89a6beded8eea8584fdb
|
3 |
+
size 4853889016
|