File size: 6,390 Bytes
35a52d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d337e0
 
 
 
35a52d1
 
 
 
 
1d337e0
35a52d1
 
 
 
 
1390245
2dbabd8
35a52d1
 
 
 
2dbabd8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35a52d1
 
 
2dbabd8
35a52d1
 
 
 
2dbabd8
35a52d1
 
 
 
2dbabd8
 
35a52d1
 
 
 
 
 
 
2dbabd8
35a52d1
2dbabd8
 
35a52d1
 
 
 
2dbabd8
35a52d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eed841c
 
35a52d1
eed841c
 
 
35a52d1
eed841c
 
35a52d1
eed841c
35a52d1
1390245
35a52d1
eed841c
35a52d1
 
 
 
 
 
 
 
 
 
2dbabd8
35a52d1
 
 
46f80fc
35a52d1
eed841c
35a52d1
eed841c
35a52d1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import os.path

import time as reqtime
import datetime
from pytz import timezone

import torch

import spaces
import gradio as gr

import random
import tqdm

from midi_to_colab_audio import midi_to_colab_audio
import TMIDIX

import matplotlib.pyplot as plt

from inference import PianoTranscription
from config import sample_rate
from utilities import load_audio

in_space = os.getenv("SYSTEM") == "spaces"
         
# =================================================================================================
                       
@spaces.GPU
def TranscribePianoAudio(input_audio):
    print('=' * 70)
    print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    start_time = reqtime.time()
    print('=' * 70)

    f = input_audio.name
    fn = os.path.basename(f)
    fn1 = fn.split('.')[0]

    print('-' * 70)
    print('Input file name:', fn)
    print('-' * 70)
    print('Loading audio...')
    
    # Load audio
    (audio, _) = load_audio(f, sr=sample_rate, mono=True)
    print('Done!')
    print('-' * 70)
    print('Loading transcriptor..')
          
    # Transcriptor
    transcriptor = PianoTranscription(device='cuda')    # 'cuda' | 'cpu'
    print('Done!')
    print('-' * 70)
    print('Transcribing...')
    
    transcribed_dict = transcriptor.transcribe(audio, fn+'.mid')
    print('Done!')
    print('-' * 70)

    #===============================================================================
    raw_score = TMIDIX.midi2single_track_ms_score(fn+'.mid')
    
    #===============================================================================
    # Enhanced score notes
    
    escore = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
    
    #==================================================================

    print('=' * 70)
    print('Number of transcribed notes:', len(escore))
    print('Sample trascribed MIDI events', escore[:5])
    print('=' * 70)
    print('Done!')
    print('=' * 70)
    
    #===============================================================================
    print('Rendering results...')
    
    patches = [0] * 16
   
    detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(escore,
                                                              output_signature = 'ByteDance Solo Piano Audio to MIDI Transcription',
                                                              output_file_name = fn1,
                                                              track_name='Project Los Angeles',
                                                              list_of_MIDI_patches=patches
                                                              )
    print('=' * 70)
    new_fn = fn1+'.mid'
            
    
    audio = midi_to_colab_audio(new_fn, 
                        soundfont_path=soundfont,
                        sample_rate=16000,
                        volume_scale=10,
                        output_for_gradio=True
                        )
    
    print('Done!')
    print('=' * 70)

    #========================================================

    output_midi_title = str(fn1)
    output_midi_summary = str(song_f[:3])
    output_midi = str(new_fn)
    output_audio = (16000, audio)
    
    output_plot = TMIDIX.plot_ms_SONG(song_f, plot_title=output_midi, return_plt=True)

    print('Output MIDI file name:', output_midi)
    print('Output MIDI title:', output_midi_title)
    print('Output MIDI summary:', output_midi_summary)
    print('=' * 70) 
    

    #========================================================
    
    print('-' * 70)
    print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    print('-' * 70)
    print('Req execution time:', (reqtime.time() - start_time), 'sec')

    return output_midi_title, output_midi_summary, output_midi, output_audio, output_plot

# =================================================================================================

if __name__ == "__main__":
    
    PDT = timezone('US/Pacific')
    
    print('=' * 70)
    print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
    print('=' * 70)

    soundfont = "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2"
   
    app = gr.Blocks()
    with app:
        gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>ByteDance Solo Piano Audio to MIDI Transcription</h1>")
        gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Transcribe any Solo Piano WAV or MP3 audio to MIDI</h1>")
        gr.Markdown(
            "![Visitors](https://api.visitorbadge.io/api/visitors?path=asigalov61.ByteDance-Solo-Piano-Adio-to-MIDI-Transcription&style=flat)\n\n"
            "This is a ByteDance Solo Piano Audio to MIDI Transcription Model\n\n"
            "Check out [ByteDance Solo Piano Audio to MIDI Transcription](https://github.com/asigalov61/piano_transcription_inference) on GitHub!\n\n"
            "[Open In Colab]"
            "(https://colab.research.google.com/github/asigalov61/tegridy-tools/blob/main/tegridy-tools/notebooks/ByteDance_Piano_Transcription.ipynb)"
            " for faster execution and endless transcription"
        )
        gr.Markdown("## Upload your Solo Piano WAV or MP3 audio or select a sample example audio file")
        
        input_audio = gr.File(label="Input Solo Piano WAV or MP3 Audio File", file_types=[".wav", ".mp3"])
        
        run_btn = gr.Button("transcribe", variant="primary")

        gr.Markdown("## Generation results")

        output_midi_title = gr.Textbox(label="Output MIDI title")
        output_midi_summary = gr.Textbox(label="Output MIDI summary")
        output_audio = gr.Audio(label="Output MIDI audio", format="wav", elem_id="midi_audio")
        output_plot = gr.Plot(label="Output MIDI score plot")
        output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])


        run_event = run_btn.click(TranscribePianoAudio, [input_audio],
                                  [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])

        gr.Examples(
            ["cut_liszt.mp3"
            ],
            [input_audio],
            [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot],
            TranscribePianoAudio,
            cache_examples=True,
        )
        
        app.queue().launch()