File size: 7,704 Bytes
04e3e49
 
 
 
fcd504e
5b523ca
04e3e49
fcd504e
39bb818
 
04e3e49
fcd504e
 
04e3e49
 
fcd504e
 
04e3e49
 
fcd504e
 
 
6b40c3f
04e3e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36191eb
04e3e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcd504e
 
 
5867445
fcd504e
 
 
5867445
fcd504e
 
131ac4e
 
 
 
fcd504e
 
5d08ec4
1398337
1f4a6b2
1398337
 
fcd504e
1398337
fcd504e
1398337
 
 
 
 
 
fcd504e
1398337
 
fcd504e
672fe50
69bca2b
 
fcd504e
 
186045c
 
 
900e185
fcd504e
900e185
fcd504e
900e185
fcd504e
900e185
fcd504e
900e185
fcd504e
5d08ec4
 
5867445
fcd504e
0bb43b1
fcd504e
55a4634
fcd504e
55a4634
fcd504e
55a4634
fcd504e
 
bfa85e8
1f4a6b2
900e185
fcd504e
900e185
f277159
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import gradio as gr
import note_seq
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
# Instrument list is imported but not currently used.
from constants import GM_INSTRUMENTS

# Import the current midi_model
tokenizer = AutoTokenizer.from_pretrained("Katpeeler/midi_model_3")
model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_3")

# Define note and bar length, relative to 120bpm.
# This is overriden if the user adjusts the bpm
NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
BAR_LENGTH_120BPM = 4.0 * 60 / 120
# Sample rate should never change, and should be imported from constants.
# I will do this once I confirm I can't use a higher sample rate for playing back audio here.
SAMPLE_RATE=44100

# Main method for transposing from tokens back to midi notes.
# Can specify an instrument_mapper when ready to add more sounds
# THIS METHOD IS FROM DR.TRISTAN BEHRENS (https://huggingface.co/TristanBehrens)
def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
    if isinstance(token_sequence, str):
        token_sequence = token_sequence.split()
    note_sequence = empty_note_sequence()

    # Render all notes.
    current_program = 1
    current_is_drum = False
    current_instrument = 0
    track_count = 0
    for token_index, token in enumerate(token_sequence):

        if token == "PIECE_START":
            pass
        elif token == "PIECE_END":
            print("The end.")
            break
        elif token == "TRACK_START":
            current_bar_index = 0
            track_count += 1
            pass
        elif token == "TRACK_END":
            pass
        elif token == "KEYS_START":
            pass
        elif token == "KEYS_END":
            pass
        elif token.startswith("KEY="):
            pass
        elif token.startswith("INST"):
            instrument = token.split("=")[-1]
            if instrument != "DRUMS" and use_program:
                if instrument_mapper is not None:
                    if instrument in instrument_mapper:
                        instrument = instrument_mapper[instrument]
                current_program = int(instrument)
                current_instrument = track_count
                current_is_drum = False
            if instrument == "DRUMS" and use_drums:
                current_instrument = 0
                current_program = 0
                current_is_drum = True
        elif token == "BAR_START":
            current_time = current_bar_index * BAR_LENGTH_120BPM
            current_notes = {}
        elif token == "BAR_END":
            current_bar_index += 1
            pass
        elif token.startswith("NOTE_ON"):
            pitch = int(token.split("=")[-1])
            note = note_sequence.notes.add()
            note.start_time = current_time
            note.end_time = current_time + 4 * NOTE_LENGTH_16TH_120BPM
            note.pitch = pitch
            note.instrument = current_instrument
            note.program = current_program
            note.velocity = 80
            note.is_drum = current_is_drum
            current_notes[pitch] = note
        elif token.startswith("NOTE_OFF"):
            pitch = int(token.split("=")[-1])
            if pitch in current_notes:
                note = current_notes[pitch]
                note.end_time = current_time
        elif token.startswith("TIME_DELTA"):
            delta = float(token.split("=")[-1]) * NOTE_LENGTH_16TH_120BPM
            current_time += delta
        elif token.startswith("DENSITY="):
            pass
        elif token == "[PAD]":
            pass
        else:
            print(f"Ignored token {token}.")
            pass

    # Make the instruments right.
    instruments_drums = []
    for note in note_sequence.notes:
        pair = [note.program, note.is_drum]
        if pair not in instruments_drums:
            instruments_drums += [pair]
        note.instrument = instruments_drums.index(pair)

    if only_piano:
        for note in note_sequence.notes:
            if not note.is_drum:
                note.instrument = 0
                note.program = 0

    return note_sequence

def empty_note_sequence(qpm=120.0, total_time=0.0):
    note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
    note_sequence.tempos.add().qpm = qpm
    note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
    note_sequence.total_time = total_time
    return note_sequence

# The process that is called when the user clicks the "generate audio" button.
# Currently takes in 3 number arguments, correlating to two parts of the input prompt,
# and the bpm.
def process(num1, num2, num3):
    # Prompt used to generate. I have this hard-coded currently to make generation smoother.
    # I include the start of the midi file, style and genre (since they are unused), start a track,
    # and allow the user to adjust the instrument number and the first note from the UI.
    created_text = f"""PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST={num1} BAR_START NOTE_ON={num2}"""
    
    # adjustments for bpm
    global NOTE_LENGTH_16TH_120BPM
    NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / num3
    global BAR_LENGTH_120BPM
    BAR_LENGTH_120BPM = 4.0 * 60 / num3

    # send the input prompt to the tokenizer, and generate
    input_ids = tokenizer.encode(created_text, return_tensors="pt")
    generated_ids = model.generate(input_ids, max_length=500)
    global generated_sequence
    generated_sequence = tokenizer.decode(generated_ids[0])

    # Convert the text of notes to audio
    note_sequence = token_sequence_to_note_sequence(generated_sequence)
    # The synth engine for playing sound
    synth = note_seq.midi_synth.synthesize
    array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
    note_plot = note_seq.plot_sequence(note_sequence, False)
    array_of_floats /=1.414
    array_of_floats *= 32767
    int16_data = array_of_floats.astype(np.int16)
    # return the sampmle rate and array, needed for gradio audio widget
    return SAMPLE_RATE, int16_data

# simple call to show the generated tokens
def generation():
    return generated_sequence

# unused call that was used to store instant feedback of the gradio sliders.
# I ended up using a simpler method for them, but am keeping this in case it becomes useful later.
def identity(x, state):
    state += 1
    return x, state, state    

# Gradio app structure
with gr.Blocks() as demo:
    # Title of the page
    gr.Markdown("Midi Generation")
    # The audio generation tab
    with gr.Tab("Audio generation"):
        # an audio widget
        audio_output = gr.Audio()
        # the slider widgets for the user to adjust the values for generation
        number1 = gr.Slider(1, 100, value=25, label="Inst number", step=1, info="Choose between 1 and 100")
        number2 = gr.Slider(1, 100, value=40, label="Note number", step=1, info="Choose between 1 and 100") 
        number3 = gr.Slider(60, 140, value=120, label="BPM", step=5, info="Choose between 60 and 140")
        # the button to send the prompt
        audio_button = gr.Button("generate audio")
    # the token generation tab
    with gr.Tab("Token generation"):
        # a text widget to display the generated tokens
        text_output = gr.Textbox()
        # the button to display the generated tokens
        text_button = gr.Button("show generated tokens")

    # The definitions for button clicks
    text_button.click(generation, inputs=None, outputs=text_output)
    audio_button.click(process, inputs=[number1, number2, number3], outputs=audio_output)

# runs the application
if __name__ == "__main__":
    demo.launch()