Spaces:
Running
Running
import gradio as gr | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from scipy.io.wavfile import write | |
audios = [ | |
["Book Example", "speaker"], | |
["Swoosh", "swoosh"], | |
["Knocking", "knocking"], | |
["Forest", "forest"], | |
["Evil Laugh", "evil-laugh"], | |
["Morning", "morning"], | |
["Cinematic", "cinematic"], | |
] | |
with gr.Blocks() as demo: | |
with gr.Tab("Waveforms"): | |
gr.Markdown("""## Waveforms | |
In this section, we'll look into the waveforms of multiple audios. | |
""") | |
for title, path in audios: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown(f"### {title}") | |
with gr.Column(scale=5): | |
waveform = gr.Image(value=f"{path}/waveform.png") | |
with gr.Column(scale=5): | |
video = gr.Video(value=f"{path}/waveform_video.mp4") | |
with gr.Tab("Understanding Frequencies"): | |
gr.Markdown("""## Understanding Frequencies | |
""") | |
freq = gr.Slider(0, 300, step=20, value=40, label="Frequency") | |
freq2 = gr.Slider(0, 30, step=5, value=0, label="Second Frequency") | |
amplitude = gr.Slider(0.05, 1, step=0.05, value=1, label="Amplitude") | |
audio = gr.Audio() | |
with gr.Row(): | |
plots = gr.Plot(label="Results") | |
with gr.Row(): | |
button = gr.Button(value="Create") | |
# https://github.com/gradio-app/gradio/issues/5469 | |
def plot_sine(freq, freq2, a): | |
sr = 44100 # samples per second | |
ts = 1.0/sr # sampling interval | |
t = np.arange(0, 1, ts) # time vector | |
data = a * np.sin(2 * np.pi * freq * t) + a * np.sin(2 * np.pi * freq2 * t) | |
# Normalize to [-1, 1] | |
data = data / np.max(np.abs(data)) | |
# Convert to 16-bit integer PCM | |
data = (data * 32767).astype(np.int16) | |
audio_data = (sr, data) | |
fig, axes = plt.subplots(nrows=2, ncols=1, sharex=False) | |
ax_waveform = axes[0] | |
ax_spectrum = axes[1] | |
ax_waveform.plot(t, data) | |
ax_waveform.set_title(f'Sine wave with frequency {freq} and amplitude {a}') | |
ax_waveform.set_xlabel('Time )s)') | |
ax_waveform.set_ylabel('Amplitude') | |
ax_waveform.set_title("Time domain of the signal") | |
X = np.fft.fft(data) | |
N = len(X) | |
n = np.arange(N) | |
T = N/sr | |
freq = n/T | |
ax_spectrum.set_xlim((0,300)) | |
ax_spectrum.stem(freq, np.abs(X), 'r', \ | |
markerfmt=" ", basefmt="-b") | |
ax_spectrum.set_xlabel("Frequency (Hz)") | |
ax_spectrum.set_title("Frequency domain of the signal") | |
fig.tight_layout() | |
fig.savefig('foo.png') | |
return audio_data, fig | |
button.click(plot_sine, inputs=[freq, freq2, amplitude], outputs=[audio, plots]) | |
with gr.Tab("Spectrograms and Mel Spectrograms"): | |
gr.Markdown("""## Waveforms | |
In this section, we'll look into the waveforms of multiple audios. | |
""") | |
for title, path in audios: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown(f"### {title}") | |
with gr.Column(scale=10): | |
gr.Image(value=f"{path}/waveform.png") | |
with gr.Column(scale=10): | |
gr.Image(value=f"{path}/fft.png") | |
with gr.Column(scale=10): | |
video = gr.Video(value=f"{path}/waveform_video.mp4") | |
with gr.Row(): | |
with gr.Column(scale=5): | |
gr.Image(value=f"{path}/spectrogram.png") | |
with gr.Column(scale=5): | |
gr.Image(value=f"{path}/mel_spectrogram.png") | |
if __name__ == '__main__': | |
demo.launch(debug=True) |