osanseviero's picture
Upload app.py
507f557 verified
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
audios = [
["Book Example", "speaker"],
["Swoosh", "swoosh"],
["Knocking", "knocking"],
["Forest", "forest"],
["Evil Laugh", "evil-laugh"],
["Morning", "morning"],
["Cinematic", "cinematic"],
]
with gr.Blocks() as demo:
with gr.Tab("Waveforms"):
gr.Markdown("""## Waveforms
In this section, we'll look into the waveforms of multiple audios.
""")
for title, path in audios:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"### {title}")
with gr.Column(scale=5):
waveform = gr.Image(value=f"{path}/waveform.png")
with gr.Column(scale=5):
video = gr.Video(value=f"{path}/waveform_video.mp4")
with gr.Tab("Understanding Frequencies"):
gr.Markdown("""## Understanding Frequencies
""")
freq = gr.Slider(0, 300, step=20, value=40, label="Frequency")
freq2 = gr.Slider(0, 30, step=5, value=0, label="Second Frequency")
amplitude = gr.Slider(0.05, 1, step=0.05, value=1, label="Amplitude")
audio = gr.Audio()
with gr.Row():
plots = gr.Plot(label="Results")
with gr.Row():
button = gr.Button(value="Create")
# https://github.com/gradio-app/gradio/issues/5469
@gr.on(inputs=[freq, freq2, amplitude], outputs=[audio, plots])
def plot_sine(freq, freq2, a):
sr = 44100 # samples per second
ts = 1.0/sr # sampling interval
t = np.arange(0, 1, ts) # time vector
data = a * np.sin(2 * np.pi * freq * t) + a * np.sin(2 * np.pi * freq2 * t)
# Normalize to [-1, 1]
data = data / np.max(np.abs(data))
# Convert to 16-bit integer PCM
data = (data * 32767).astype(np.int16)
audio_data = (sr, data)
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=False)
ax_waveform = axes[0]
ax_spectrum = axes[1]
ax_waveform.plot(t, data)
ax_waveform.set_title(f'Sine wave with frequency {freq} and amplitude {a}')
ax_waveform.set_xlabel('Time )s)')
ax_waveform.set_ylabel('Amplitude')
ax_waveform.set_title("Time domain of the signal")
X = np.fft.fft(data)
N = len(X)
n = np.arange(N)
T = N/sr
freq = n/T
ax_spectrum.set_xlim((0,300))
ax_spectrum.stem(freq, np.abs(X), 'r', \
markerfmt=" ", basefmt="-b")
ax_spectrum.set_xlabel("Frequency (Hz)")
ax_spectrum.set_title("Frequency domain of the signal")
fig.tight_layout()
fig.savefig('foo.png')
return audio_data, fig
button.click(plot_sine, inputs=[freq, freq2, amplitude], outputs=[audio, plots])
with gr.Tab("Spectrograms and Mel Spectrograms"):
gr.Markdown("""## Waveforms
In this section, we'll look into the waveforms of multiple audios.
""")
for title, path in audios:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"### {title}")
with gr.Column(scale=10):
gr.Image(value=f"{path}/waveform.png")
with gr.Column(scale=10):
gr.Image(value=f"{path}/fft.png")
with gr.Column(scale=10):
video = gr.Video(value=f"{path}/waveform_video.mp4")
with gr.Row():
with gr.Column(scale=5):
gr.Image(value=f"{path}/spectrogram.png")
with gr.Column(scale=5):
gr.Image(value=f"{path}/mel_spectrogram.png")
if __name__ == '__main__':
demo.launch(debug=True)