Spaces:
Runtime error
Runtime error
File size: 7,704 Bytes
04e3e49 fcd504e 5b523ca 04e3e49 fcd504e 39bb818 04e3e49 fcd504e 04e3e49 fcd504e 04e3e49 fcd504e 6b40c3f 04e3e49 36191eb 04e3e49 fcd504e 5867445 fcd504e 5867445 fcd504e 131ac4e fcd504e 5d08ec4 1398337 1f4a6b2 1398337 fcd504e 1398337 fcd504e 1398337 fcd504e 1398337 fcd504e 672fe50 69bca2b fcd504e 186045c 900e185 fcd504e 900e185 fcd504e 900e185 fcd504e 900e185 fcd504e 900e185 fcd504e 5d08ec4 5867445 fcd504e 0bb43b1 fcd504e 55a4634 fcd504e 55a4634 fcd504e 55a4634 fcd504e bfa85e8 1f4a6b2 900e185 fcd504e 900e185 f277159 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import gradio as gr
import note_seq
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
# Instrument list is imported but not currently used.
from constants import GM_INSTRUMENTS
# Import the current midi_model
tokenizer = AutoTokenizer.from_pretrained("Katpeeler/midi_model_3")
model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_3")
# Define note and bar length, relative to 120bpm.
# This is overriden if the user adjusts the bpm
NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
BAR_LENGTH_120BPM = 4.0 * 60 / 120
# Sample rate should never change, and should be imported from constants.
# I will do this once I confirm I can't use a higher sample rate for playing back audio here.
SAMPLE_RATE=44100
# Main method for transposing from tokens back to midi notes.
# Can specify an instrument_mapper when ready to add more sounds
# THIS METHOD IS FROM DR.TRISTAN BEHRENS (https://huggingface.co/TristanBehrens)
def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
if isinstance(token_sequence, str):
token_sequence = token_sequence.split()
note_sequence = empty_note_sequence()
# Render all notes.
current_program = 1
current_is_drum = False
current_instrument = 0
track_count = 0
for token_index, token in enumerate(token_sequence):
if token == "PIECE_START":
pass
elif token == "PIECE_END":
print("The end.")
break
elif token == "TRACK_START":
current_bar_index = 0
track_count += 1
pass
elif token == "TRACK_END":
pass
elif token == "KEYS_START":
pass
elif token == "KEYS_END":
pass
elif token.startswith("KEY="):
pass
elif token.startswith("INST"):
instrument = token.split("=")[-1]
if instrument != "DRUMS" and use_program:
if instrument_mapper is not None:
if instrument in instrument_mapper:
instrument = instrument_mapper[instrument]
current_program = int(instrument)
current_instrument = track_count
current_is_drum = False
if instrument == "DRUMS" and use_drums:
current_instrument = 0
current_program = 0
current_is_drum = True
elif token == "BAR_START":
current_time = current_bar_index * BAR_LENGTH_120BPM
current_notes = {}
elif token == "BAR_END":
current_bar_index += 1
pass
elif token.startswith("NOTE_ON"):
pitch = int(token.split("=")[-1])
note = note_sequence.notes.add()
note.start_time = current_time
note.end_time = current_time + 4 * NOTE_LENGTH_16TH_120BPM
note.pitch = pitch
note.instrument = current_instrument
note.program = current_program
note.velocity = 80
note.is_drum = current_is_drum
current_notes[pitch] = note
elif token.startswith("NOTE_OFF"):
pitch = int(token.split("=")[-1])
if pitch in current_notes:
note = current_notes[pitch]
note.end_time = current_time
elif token.startswith("TIME_DELTA"):
delta = float(token.split("=")[-1]) * NOTE_LENGTH_16TH_120BPM
current_time += delta
elif token.startswith("DENSITY="):
pass
elif token == "[PAD]":
pass
else:
print(f"Ignored token {token}.")
pass
# Make the instruments right.
instruments_drums = []
for note in note_sequence.notes:
pair = [note.program, note.is_drum]
if pair not in instruments_drums:
instruments_drums += [pair]
note.instrument = instruments_drums.index(pair)
if only_piano:
for note in note_sequence.notes:
if not note.is_drum:
note.instrument = 0
note.program = 0
return note_sequence
def empty_note_sequence(qpm=120.0, total_time=0.0):
note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
note_sequence.tempos.add().qpm = qpm
note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
note_sequence.total_time = total_time
return note_sequence
# The process that is called when the user clicks the "generate audio" button.
# Currently takes in 3 number arguments, correlating to two parts of the input prompt,
# and the bpm.
def process(num1, num2, num3):
# Prompt used to generate. I have this hard-coded currently to make generation smoother.
# I include the start of the midi file, style and genre (since they are unused), start a track,
# and allow the user to adjust the instrument number and the first note from the UI.
created_text = f"""PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST={num1} BAR_START NOTE_ON={num2}"""
# adjustments for bpm
global NOTE_LENGTH_16TH_120BPM
NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / num3
global BAR_LENGTH_120BPM
BAR_LENGTH_120BPM = 4.0 * 60 / num3
# send the input prompt to the tokenizer, and generate
input_ids = tokenizer.encode(created_text, return_tensors="pt")
generated_ids = model.generate(input_ids, max_length=500)
global generated_sequence
generated_sequence = tokenizer.decode(generated_ids[0])
# Convert the text of notes to audio
note_sequence = token_sequence_to_note_sequence(generated_sequence)
# The synth engine for playing sound
synth = note_seq.midi_synth.synthesize
array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
note_plot = note_seq.plot_sequence(note_sequence, False)
array_of_floats /=1.414
array_of_floats *= 32767
int16_data = array_of_floats.astype(np.int16)
# return the sampmle rate and array, needed for gradio audio widget
return SAMPLE_RATE, int16_data
# simple call to show the generated tokens
def generation():
return generated_sequence
# unused call that was used to store instant feedback of the gradio sliders.
# I ended up using a simpler method for them, but am keeping this in case it becomes useful later.
def identity(x, state):
state += 1
return x, state, state
# Gradio app structure
with gr.Blocks() as demo:
# Title of the page
gr.Markdown("Midi Generation")
# The audio generation tab
with gr.Tab("Audio generation"):
# an audio widget
audio_output = gr.Audio()
# the slider widgets for the user to adjust the values for generation
number1 = gr.Slider(1, 100, value=25, label="Inst number", step=1, info="Choose between 1 and 100")
number2 = gr.Slider(1, 100, value=40, label="Note number", step=1, info="Choose between 1 and 100")
number3 = gr.Slider(60, 140, value=120, label="BPM", step=5, info="Choose between 60 and 140")
# the button to send the prompt
audio_button = gr.Button("generate audio")
# the token generation tab
with gr.Tab("Token generation"):
# a text widget to display the generated tokens
text_output = gr.Textbox()
# the button to display the generated tokens
text_button = gr.Button("show generated tokens")
# The definitions for button clicks
text_button.click(generation, inputs=None, outputs=text_output)
audio_button.click(process, inputs=[number1, number2, number3], outputs=audio_output)
# runs the application
if __name__ == "__main__":
demo.launch() |