osanseviero's picture
Upload app.py
507f557 verified
raw
history blame
4.03 kB
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from scipy.io.wavfile import write
audios = [
["Book Example", "speaker"],
["Swoosh", "swoosh"],
["Knocking", "knocking"],
["Forest", "forest"],
["Evil Laugh", "evil-laugh"],
["Morning", "morning"],
["Cinematic", "cinematic"],
]
with gr.Blocks() as demo:
with gr.Tab("Waveforms"):
gr.Markdown("""## Waveforms
In this section, we'll look into the waveforms of multiple audios.
""")
for title, path in audios:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"### {title}")
with gr.Column(scale=5):
waveform = gr.Image(value=f"{path}/waveform.png")
with gr.Column(scale=5):
video = gr.Video(value=f"{path}/waveform_video.mp4")
with gr.Tab("Understanding Frequencies"):
gr.Markdown("""## Understanding Frequencies
""")
freq = gr.Slider(0, 300, step=20, value=40, label="Frequency")
freq2 = gr.Slider(0, 30, step=5, value=0, label="Second Frequency")
amplitude = gr.Slider(0.05, 1, step=0.05, value=1, label="Amplitude")
audio = gr.Audio()
with gr.Row():
plots = gr.Plot(label="Results")
with gr.Row():
button = gr.Button(value="Create")
# https://github.com/gradio-app/gradio/issues/5469
@gr.on(inputs=[freq, freq2, amplitude], outputs=[audio, plots])
def plot_sine(freq, freq2, a):
sr = 44100 # samples per second
ts = 1.0/sr # sampling interval
t = np.arange(0, 1, ts) # time vector
data = a * np.sin(2 * np.pi * freq * t) + a * np.sin(2 * np.pi * freq2 * t)
# Normalize to [-1, 1]
data = data / np.max(np.abs(data))
# Convert to 16-bit integer PCM
data = (data * 32767).astype(np.int16)
audio_data = (sr, data)
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=False)
ax_waveform = axes[0]
ax_spectrum = axes[1]
ax_waveform.plot(t, data)
ax_waveform.set_title(f'Sine wave with frequency {freq} and amplitude {a}')
ax_waveform.set_xlabel('Time )s)')
ax_waveform.set_ylabel('Amplitude')
ax_waveform.set_title("Time domain of the signal")
X = np.fft.fft(data)
N = len(X)
n = np.arange(N)
T = N/sr
freq = n/T
ax_spectrum.set_xlim((0,300))
ax_spectrum.stem(freq, np.abs(X), 'r', \
markerfmt=" ", basefmt="-b")
ax_spectrum.set_xlabel("Frequency (Hz)")
ax_spectrum.set_title("Frequency domain of the signal")
fig.tight_layout()
fig.savefig('foo.png')
return audio_data, fig
button.click(plot_sine, inputs=[freq, freq2, amplitude], outputs=[audio, plots])
with gr.Tab("Spectrograms and Mel Spectrograms"):
gr.Markdown("""## Waveforms
In this section, we'll look into the waveforms of multiple audios.
""")
for title, path in audios:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown(f"### {title}")
with gr.Column(scale=10):
gr.Image(value=f"{path}/waveform.png")
with gr.Column(scale=10):
gr.Image(value=f"{path}/fft.png")
with gr.Column(scale=10):
video = gr.Video(value=f"{path}/waveform_video.mp4")
with gr.Row():
with gr.Column(scale=5):
gr.Image(value=f"{path}/spectrogram.png")
with gr.Column(scale=5):
gr.Image(value=f"{path}/mel_spectrogram.png")
if __name__ == '__main__':
demo.launch(debug=True)