Spaces:

Abduhoshim
/

speech_emotion_detection

Runtime error

App Files Files Community

speech_emotion_detection / app.py

Abduhoshim

Update app.py

e277959 almost 2 years ago

raw

history blame

2.75 kB

	from tensorflow import keras
	import os
	import soundfile as sf
	import numpy as np
	import librosa
	import gradio as gr
	import seaborn as sns
	import pandas as pd
	import plotly.express as px
	model = keras.models.load_model('emotion.h5')
	labels = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Suprised']
	def predict(audio):
	wave, sr = librosa.load(audio, sr=None)
	segment_dur_secs = 3
	segment_length = sr * segment_dur_secs
	num_sections = int(np.ceil(len(wave) / segment_length))
	split = []
	paths =[]
	for i in range(num_sections):
	t = wave[i * segment_length: (i + 1) * segment_length]
	split.append(t)

	out_dir = ('audio_data/splits/')
	os.makedirs(out_dir, exist_ok=True)
	for i in range(num_sections):
	recording_name = os.path.basename(audio[:-4])
	out_file = f"{recording_name}_{str(i)}.wav"
	sf.write(os.path.join(out_dir, out_file), split[i], sr)
	paths.append(os.path.join(out_dir, out_file))


	predicted_features = pd.DataFrame(columns=['features'])
	counter=0
	for path in paths:
	X, sample_rate = librosa.load(path
	,duration=2.5
	,sr=44100
	,offset=0.5
	)
	sample_rate = np.array(sample_rate)
	mfccs = np.mean(librosa.feature.mfcc(y=X,
	sr=sample_rate,
	n_mfcc=13),
	axis=0)
	predicted_features.loc[counter] = [mfccs]
	counter=counter+1
	predicted_features = pd.DataFrame(predicted_features['features'].values.tolist())
	predicted_features.dropna(inplace=True)
	preds = model.predict(predicted_features)

	preds=preds.argmax(axis=1)
	df_preds = pd.DataFrame(preds,columns = ['prediction'])
	emotions = []
	for i in df_preds['prediction']:
	emotion = labels[int(i)]
	emotions.append(emotion)
	df_preds['emotion'] = emotions
	df_preds = df_preds.reset_index()
	fig = px.line(df_preds, x="index", y="emotion", title='Life expectancy in Canada')
	return fig

	outputs = gr.Plot()
	title = "Emotion recognition"
	description = "This model can shows how speaker emotion changes over the speech"

	infr = gr.Interface(fn=predict,
	inputs=gr.Audio(type="filepath"),
	examples=['audio_samples/1.mp3','audio_samples/2.mp3','audio_samples/3.mp3','audio_samples/4.mp3']
	cache_examples=True,
	outputs=outputs,
	title=title,description=description,interpretation='default',)
	infr.launch()