File size: 5,359 Bytes
04e3e49
 
 
 
 
d8f1427
04e3e49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1398337
 
 
 
 
 
 
 
 
 
 
 
 
 
04e3e49
69bca2b
 
 
 
 
 
8aa7395
69bca2b
04e3e49
 
 
fc9d6bd
04e3e49
 
49cd765
2d85711
04e3e49
 
69bca2b
 
 
 
 
 
 
 
04e3e49
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
import note_seq
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("Katpeeler/lmd_8bars_tokenizer")
model = AutoModelForCausalLM.from_pretrained("Katpeeler/midi_model_1")

NOTE_LENGTH_16TH_120BPM = 0.25 * 60 / 120
BAR_LENGTH_120BPM = 4.0 * 60 / 120
SAMPLE_RATE=44100

def token_sequence_to_note_sequence(token_sequence, use_program=True, use_drums=True, instrument_mapper=None, only_piano=False):
    if isinstance(token_sequence, str):
        token_sequence = token_sequence.split()
    note_sequence = empty_note_sequence()

    # Render all notes.
    current_program = 1
    current_is_drum = False
    current_instrument = 0
    track_count = 0
    for token_index, token in enumerate(token_sequence):

        if token == "PIECE_START":
            pass
        elif token == "PIECE_END":
            print("The end.")
            break
        elif token == "TRACK_START":
            current_bar_index = 0
            track_count += 1
            pass
        elif token == "TRACK_END":
            pass
        elif token == "KEYS_START":
            pass
        elif token == "KEYS_END":
            pass
        elif token.startswith("KEY="):
            pass
        elif token.startswith("INST"):
            instrument = token.split("=")[-1]
            if instrument != "DRUMS" and use_program:
                if instrument_mapper is not None:
                    if instrument in instrument_mapper:
                        instrument = instrument_mapper[instrument]
                current_program = int(instrument)
                current_instrument = track_count
                current_is_drum = False
            if instrument == "DRUMS" and use_drums:
                current_instrument = 0
                current_program = 0
                current_is_drum = True
        elif token == "BAR_START":
            current_time = current_bar_index * BAR_LENGTH_120BPM
            current_notes = {}
        elif token == "BAR_END":
            current_bar_index += 1
            pass
        elif token.startswith("NOTE_ON"):
            pitch = int(token.split("=")[-1])
            note = note_sequence.notes.add()
            note.start_time = current_time
            note.end_time = current_time + 4 * NOTE_LENGTH_16TH_120BPM
            note.pitch = pitch
            note.instrument = current_instrument
            note.program = current_program
            note.velocity = 80
            note.is_drum = current_is_drum
            current_notes[pitch] = note
        elif token.startswith("NOTE_OFF"):
            pitch = int(token.split("=")[-1])
            if pitch in current_notes:
                note = current_notes[pitch]
                note.end_time = current_time
        elif token.startswith("TIME_DELTA"):
            delta = float(token.split("=")[-1]) * NOTE_LENGTH_16TH_120BPM
            current_time += delta
        elif token.startswith("DENSITY="):
            pass
        elif token == "[PAD]":
            pass
        else:
            #print(f"Ignored token {token}.")
            pass

    # Make the instruments right.
    instruments_drums = []
    for note in note_sequence.notes:
        pair = [note.program, note.is_drum]
        if pair not in instruments_drums:
            instruments_drums += [pair]
        note.instrument = instruments_drums.index(pair)

    if only_piano:
        for note in note_sequence.notes:
            if not note.is_drum:
                note.instrument = 0
                note.program = 0

    return note_sequence

def empty_note_sequence(qpm=120.0, total_time=0.0):
    note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
    note_sequence.tempos.add().qpm = qpm
    note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
    note_sequence.total_time = total_time
    return note_sequence

def process(text):
    input_ids = tokenizer.encode(text, return_tensors="pt")
    generated_ids = model.generate(input_ids, max_length=500)
    generated_sequence = tokenizer.decode(generated_ids[0])

    # Convert text of notes to audio
    note_sequence = token_sequence_to_note_sequence(generated_sequence)
    synth = note_seq.midi_synth.synthesize
    array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
    note_plot = note_seq.plot_sequence(note_sequence, False)
    array_of_floats /=1.414
    array_of_floats *= 32767
    int16_data = array_of_floats.astype(np.int16)
    return SAMPLE_RATE, int16_data


def generation(text):
    input_ids = tokenizer.encode(text, return_tensors="pt")
    generated_ids = model.generate(input_ids, max_length=500)
    generated_sequence = tokenizer.decode(generated_ids[0])
    return generated_sequence

title = "Music gen with GPT-2"
title2 = "Midi token generation"

iface = gr.Interface(
  fn=process, 
  inputs=["text"],
  outputs=['audio'],
  title=title,
  #examples=[["PIECE_START"], ["PIECE_START STYLE=JSFAKES GENRE=JSFAKES TRACK_START INST=48 BAR_START NOTE_ON=61"]],
  article="This demo is inspired in the notebook from https://huggingface.co/TristanBehrens/js-fakes-4bars."
)

iface = gr.Interface(
    fn=generation
    inputs=["text"],
    outputs=["text"],
    title=title2,
    article="This will generate the encoded text that represents Midi language."
)

iface.launch(debug=True)