Spaces:

genaibook
/

audio_visualizations

Running

App Files Files Community

audio_visualizations / app.py

osanseviero

Upload app.py

507f557 verified about 2 months ago

raw

history blame contribute delete

4.03 kB

	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy.io.wavfile import write

	audios = [
	["Book Example", "speaker"],
	["Swoosh", "swoosh"],
	["Knocking", "knocking"],
	["Forest", "forest"],
	["Evil Laugh", "evil-laugh"],
	["Morning", "morning"],
	["Cinematic", "cinematic"],
	]



	with gr.Blocks() as demo:
	with gr.Tab("Waveforms"):
	gr.Markdown("""## Waveforms

	In this section, we'll look into the waveforms of multiple audios.

	""")
	for title, path in audios:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(f"### {title}")
	with gr.Column(scale=5):
	waveform = gr.Image(value=f"{path}/waveform.png")
	with gr.Column(scale=5):
	video = gr.Video(value=f"{path}/waveform_video.mp4")

	with gr.Tab("Understanding Frequencies"):
	gr.Markdown("""## Understanding Frequencies
	""")
	freq = gr.Slider(0, 300, step=20, value=40, label="Frequency")
	freq2 = gr.Slider(0, 30, step=5, value=0, label="Second Frequency")
	amplitude = gr.Slider(0.05, 1, step=0.05, value=1, label="Amplitude")

	audio = gr.Audio()
	with gr.Row():
	plots = gr.Plot(label="Results")
	with gr.Row():
	button = gr.Button(value="Create")

	# https://github.com/gradio-app/gradio/issues/5469
	@gr.on(inputs=[freq, freq2, amplitude], outputs=[audio, plots])
	def plot_sine(freq, freq2, a):
	sr = 44100 # samples per second
	ts = 1.0/sr # sampling interval
	t = np.arange(0, 1, ts) # time vector
	data = a * np.sin(2 * np.pi * freq * t) + a * np.sin(2 * np.pi * freq2 * t)

	# Normalize to [-1, 1]
	data = data / np.max(np.abs(data))

	# Convert to 16-bit integer PCM
	data = (data * 32767).astype(np.int16)
	audio_data = (sr, data)

	fig, axes = plt.subplots(nrows=2, ncols=1, sharex=False)
	ax_waveform = axes[0]
	ax_spectrum = axes[1]

	ax_waveform.plot(t, data)
	ax_waveform.set_title(f'Sine wave with frequency {freq} and amplitude {a}')
	ax_waveform.set_xlabel('Time )s)')
	ax_waveform.set_ylabel('Amplitude')
	ax_waveform.set_title("Time domain of the signal")

	X = np.fft.fft(data)
	N = len(X)
	n = np.arange(N)
	T = N/sr
	freq = n/T
	ax_spectrum.set_xlim((0,300))
	ax_spectrum.stem(freq, np.abs(X), 'r', \
	markerfmt=" ", basefmt="-b")
	ax_spectrum.set_xlabel("Frequency (Hz)")
	ax_spectrum.set_title("Frequency domain of the signal")

	fig.tight_layout()
	fig.savefig('foo.png')
	return audio_data, fig
	button.click(plot_sine, inputs=[freq, freq2, amplitude], outputs=[audio, plots])
	with gr.Tab("Spectrograms and Mel Spectrograms"):
	gr.Markdown("""## Waveforms

	In this section, we'll look into the waveforms of multiple audios.

	""")
	for title, path in audios:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown(f"### {title}")
	with gr.Column(scale=10):
	gr.Image(value=f"{path}/waveform.png")
	with gr.Column(scale=10):
	gr.Image(value=f"{path}/fft.png")
	with gr.Column(scale=10):
	video = gr.Video(value=f"{path}/waveform_video.mp4")
	with gr.Row():
	with gr.Column(scale=5):
	gr.Image(value=f"{path}/spectrogram.png")
	with gr.Column(scale=5):
	gr.Image(value=f"{path}/mel_spectrogram.png")


	if __name__ == '__main__':
	demo.launch(debug=True)