Abduhoshim's picture
Update app.py
e277959
raw
history blame
2.75 kB
from tensorflow import keras
import os
import soundfile as sf
import numpy as np
import librosa
import gradio as gr
import seaborn as sns
import pandas as pd
import plotly.express as px
model = keras.models.load_model('emotion.h5')
labels = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Suprised']
def predict(audio):
wave, sr = librosa.load(audio, sr=None)
segment_dur_secs = 3
segment_length = sr * segment_dur_secs
num_sections = int(np.ceil(len(wave) / segment_length))
split = []
paths =[]
for i in range(num_sections):
t = wave[i * segment_length: (i + 1) * segment_length]
split.append(t)
out_dir = ('audio_data/splits/')
os.makedirs(out_dir, exist_ok=True)
for i in range(num_sections):
recording_name = os.path.basename(audio[:-4])
out_file = f"{recording_name}_{str(i)}.wav"
sf.write(os.path.join(out_dir, out_file), split[i], sr)
paths.append(os.path.join(out_dir, out_file))
predicted_features = pd.DataFrame(columns=['features'])
counter=0
for path in paths:
X, sample_rate = librosa.load(path
,duration=2.5
,sr=44100
,offset=0.5
)
sample_rate = np.array(sample_rate)
mfccs = np.mean(librosa.feature.mfcc(y=X,
sr=sample_rate,
n_mfcc=13),
axis=0)
predicted_features.loc[counter] = [mfccs]
counter=counter+1
predicted_features = pd.DataFrame(predicted_features['features'].values.tolist())
predicted_features.dropna(inplace=True)
preds = model.predict(predicted_features)
preds=preds.argmax(axis=1)
df_preds = pd.DataFrame(preds,columns = ['prediction'])
emotions = []
for i in df_preds['prediction']:
emotion = labels[int(i)]
emotions.append(emotion)
df_preds['emotion'] = emotions
df_preds = df_preds.reset_index()
fig = px.line(df_preds, x="index", y="emotion", title='Life expectancy in Canada')
return fig
outputs = gr.Plot()
title = "Emotion recognition"
description = "This model can shows how speaker emotion changes over the speech"
infr = gr.Interface(fn=predict,
inputs=gr.Audio(type="filepath"),
examples=['audio_samples/1.mp3','audio_samples/2.mp3','audio_samples/3.mp3','audio_samples/4.mp3']
cache_examples=True,
outputs=outputs,
title=title,description=description,interpretation='default',)
infr.launch()