Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results_organized/bleu/st.csv +8 -0
- results_organized/llama3_70b_judge/accent_recognition.csv +8 -0
- results_organized/llama3_70b_judge/audio_captioning.csv +8 -0
- results_organized/llama3_70b_judge/audio_scene_question_answering.csv +8 -0
- results_organized/llama3_70b_judge/emotion_recognition.csv +9 -1
- results_organized/llama3_70b_judge/gender_recognition.csv +9 -1
- results_organized/llama3_70b_judge/music_understanding.csv +9 -1
- results_organized/llama3_70b_judge/speech_instruction.csv +8 -0
- results_organized/llama3_70b_judge/sqa_english.csv +9 -1
- results_organized/llama3_70b_judge/sqa_singlish.csv +8 -0
- results_organized/meteor/audio_captioning.csv +8 -0
- results_organized/wer/asr_english.csv +8 -0
- results_organized/wer/asr_mandarin.csv +8 -0
- results_organized/wer/asr_singlish.csv +8 -0
results_organized/bleu/st.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,37.60224687716629,43.941098854450516,14.407399367512914,44.43289180618449,18.76473995941838,5.023057608950299
|
|
|
|
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
+
whisper_large_v3,,,,,,
|
3 |
+
gemini-1.5-flash,,,,,,
|
4 |
+
Qwen-Audio-Chat,4.102230932924371,15.330641138043728,,0.45648619714728844,9.898238298955656,0.01699144301093184
|
5 |
+
SALMONN_7B,14.102682915273142,33.88941292215531,0.00046745670226766583,26.89649039333571,5.296039450108202,0.3649023706010388
|
6 |
+
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
7 |
+
WavLLM_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,37.60224687716629,43.941098854450516,14.407399367512914,44.43289180618449,18.76473995941838,5.023057608950299
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,10.930203684508578,5.987143868370054,1.0368044741318085,46.79924664837527,14.154700735606419,2.4245628096245917
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,27.620150160643625,35.274306071307024,8.433062902024755,46.80524126004861,15.209998552437538,2.8327095799289337
|
results_organized/llama3_70b_judge/accent_recognition.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,voxceleb_accent_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,47.01682396389003
|
|
|
|
|
|
1 |
Model,voxceleb_accent_test
|
2 |
+
whisper_large_v3,
|
3 |
+
gemini-1.5-flash,
|
4 |
+
Qwen-Audio-Chat,48.05088223225277
|
5 |
+
SALMONN_7B,34.222404595814524
|
6 |
+
Qwen2-Audio-7B-Instruct,29.187525646286417
|
7 |
+
WavLLM_fairseq,
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,47.01682396389003
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,39.32704144439885
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,24.640951990151827
|
results_organized/llama3_70b_judge/audio_captioning.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,38.00454545454545,33.97687861271676
|
|
|
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
+
whisper_large_v3,,
|
3 |
+
gemini-1.5-flash,,
|
4 |
+
Qwen-Audio-Chat,47.04090909090909,32.9364161849711
|
5 |
+
SALMONN_7B,37.445454545454545,23.76878612716763
|
6 |
+
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
7 |
+
WavLLM_fairseq,5.5,6.901734104046243
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,38.00454545454545,33.97687861271676
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,2.4727272727272727,3.445086705202312
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,3.0954545454545457,6.3468208092485545
|
results_organized/llama3_70b_judge/audio_scene_question_answering.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,63.15021876519203,49.77635782747604,46.31578947368421
|
|
|
|
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
+
whisper_large_v3,,,
|
3 |
+
gemini-1.5-flash,,,
|
4 |
+
Qwen-Audio-Chat,61.934856587263,50.22364217252396,42.69736842105263
|
5 |
+
SALMONN_7B,57.75401069518716,50.287539936102235,47.30263157894737
|
6 |
+
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
7 |
+
WavLLM_fairseq,43.01199466903598,29.840255591054312,26.25
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,63.15021876519203,49.77635782747604,46.31578947368421
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,29.47134606841404,17.380191693290733,16.710526315789473
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,24.647544968400585,18.466453674121407,18.88157894736842
|
results_organized/llama3_70b_judge/emotion_recognition.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
+
whisper_large_v3,,,
|
3 |
+
gemini-1.5-flash,,,
|
4 |
+
Qwen-Audio-Chat,,,
|
5 |
+
SALMONN_7B,,,
|
6 |
+
Qwen2-Audio-7B-Instruct,,,
|
7 |
+
WavLLM_fairseq,,,
|
8 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,48.505976095617534,46.206896551724135,36.36015325670498
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,,,
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,44.322709163346616,,
|
results_organized/llama3_70b_judge/gender_recognition.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
+
whisper_large_v3,,
|
3 |
+
gemini-1.5-flash,,
|
4 |
+
Qwen-Audio-Chat,,
|
5 |
+
SALMONN_7B,,
|
6 |
+
Qwen2-Audio-7B-Instruct,,
|
7 |
+
WavLLM_fairseq,,
|
8 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,99.75379565038982,93.48605577689243
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,,
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,,
|
results_organized/llama3_70b_judge/music_understanding.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,muchomusic_test
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,muchomusic_test
|
2 |
+
whisper_large_v3,
|
3 |
+
gemini-1.5-flash,
|
4 |
+
Qwen-Audio-Chat,
|
5 |
+
SALMONN_7B,
|
6 |
+
Qwen2-Audio-7B-Instruct,
|
7 |
+
WavLLM_fairseq,
|
8 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,57.7927548441449
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,
|
results_organized/llama3_70b_judge/speech_instruction.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,65.6,74.80000000000001
|
|
|
|
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
+
whisper_large_v3,,
|
3 |
+
gemini-1.5-flash,,
|
4 |
+
Qwen-Audio-Chat,10.600000000000001,9.8
|
5 |
+
SALMONN_7B,15.8,17.2
|
6 |
+
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
7 |
+
WavLLM_fairseq,19.2,21.6
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,65.6,74.80000000000001
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,63.0,70.8
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,72.2,73.8
|
results_organized/llama3_70b_judge/sqa_english.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
+
whisper_large_v3,,,,,
|
3 |
+
gemini-1.5-flash,,,,,
|
4 |
+
Qwen-Audio-Chat,79.36274509803921,63.16860465116279,64.8327415436367,,
|
5 |
+
SALMONN_7B,83.48039215686273,59.24418604651163,66.39506634273968,,
|
6 |
+
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,64.86264249672958,,
|
7 |
+
WavLLM_fairseq,83.92156862745098,58.54651162790698,77.64903756307233,,
|
8 |
+
MERaLiON-AudioLLM-Whisper-SEA-LION,86.76470588235293,59.7093023255814,73.66473556344609,88.50726552179657,84.31782540512285
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,82.99019607843137,64.94186046511628,83.81984675761541,,
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,88.57843137254902,73.11046511627907,88.61894972902262,91.85380889476001,89.33612127548353
|
results_organized/llama3_70b_judge/sqa_singlish.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,imda_part3_30s_sqa_human_test,imda_part4_30s_sqa_human_test,imda_part5_30s_sqa_human_test,imda_part6_30s_sqa_human_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,51.4,53.2,64.80000000000001,67.2
|
|
|
|
|
|
1 |
Model,imda_part3_30s_sqa_human_test,imda_part4_30s_sqa_human_test,imda_part5_30s_sqa_human_test,imda_part6_30s_sqa_human_test
|
2 |
+
whisper_large_v3,,,,
|
3 |
+
gemini-1.5-flash,,,,
|
4 |
+
Qwen-Audio-Chat,32.2,37.8,47.800000000000004,51.4
|
5 |
+
SALMONN_7B,40.599999999999994,36.6,44.6,46.8
|
6 |
+
Qwen2-Audio-7B-Instruct,42.0,39.6,51.6,53.6
|
7 |
+
WavLLM_fairseq,45.199999999999996,46.6,50.8,62.199999999999996
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,51.4,53.2,64.80000000000001,67.2
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,49.0,53.8,57.800000000000004,64.0
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,56.0,66.0,74.0,71.6
|
results_organized/meteor/audio_captioning.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.24920047034353812,0.3175511907248581
|
|
|
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
+
whisper_large_v3,,
|
3 |
+
gemini-1.5-flash,,
|
4 |
+
Qwen-Audio-Chat,0.27553015076950976,0.2355106805560457
|
5 |
+
SALMONN_7B,0.20994052484339956,0.17175112770658157
|
6 |
+
Qwen2-Audio-7B-Instruct,0.19891712076314283,0.21342294856199182
|
7 |
+
WavLLM_fairseq,0.041732965094428545,0.06399522524688675
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.24920047034353812,0.3175511907248581
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.07953048457785493,0.1388630786594543
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.05796819723943051,0.120421856260385
|
results_organized/wer/asr_english.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.022918474365262006,0.041576030415949455,0.07811646454714301,0.21050407754683692,0.14457154747310655,0.13488732754499672,0.1652245056860175,0.07884745040985061,0.10228682857649353
|
|
|
|
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test
|
2 |
+
whisper_large_v3,0.01878749009695552,0.03660128246354058,0.10001863741235596,0.14602420615337386,0.09459022434812692,0.11863959266711877,0.15887899737116104,0.037649480146197796,0.03208650948413402
|
3 |
+
gemini-1.5-flash,,,,,,,,,
|
4 |
+
Qwen-Audio-Chat,0.020258799562379748,0.043467569561352074,0.11272421128398918,0.31419144746723354,0.13018910022587737,0.2655529121410546,0.3664994875132684,0.04052375714133636,0.2911540507002305
|
5 |
+
SALMONN_7B,0.10270871845172973,0.09671439650443565,0.3062255383962828,0.23699946689025367,0.10765150204693537,0.2577708974886327,0.3597423676988383,0.0459884319222171,0.14231519234178336
|
6 |
+
Qwen2-Audio-7B-Instruct,0.035141660693401744,0.060415760304159495,0.11438872500819404,0.2165498391593041,0.11723812890302816,0.18872219319407232,0.23542555661330924,0.06114048472375004,0.08739585179932637
|
7 |
+
WavLLM_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.022918474365262006,0.041576030415949455,0.07811646454714301,0.21050407754683692,0.14457154747310655,0.13488732754499672,0.1652245056860175,0.07884745040985061,0.10228682857649353
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.018334779492209605,0.03714982881570734,0.09876543209876543,0.14540692118393275,0.09515429104337297,0.11773910240019567,0.15611126487402763,0.038146268762641496,0.04754476156709803
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.032349945297468596,0.05307658841999735,0.10600831614192711,0.20140159998943682,0.09948381629977261,0.11416493424197618,0.1448629161356777,0.04900464852205386,0.04396383619925545
|
results_organized/wer/asr_mandarin.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,aishell_asr_zh_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.13165449110094832
|
|
|
|
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
+
whisper_large_v3,0.12359684029221357
|
3 |
+
gemini-1.5-flash,
|
4 |
+
Qwen-Audio-Chat,0.9469917443725129
|
5 |
+
SALMONN_7B,0.8259290055631446
|
6 |
+
Qwen2-Audio-7B-Instruct,0.09260359129694522
|
7 |
+
WavLLM_fairseq,0.7054601967888183
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.13165449110094832
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.12450753301261111
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.20886539565639167
|
results_organized/wer/asr_singlish.csv
CHANGED
@@ -1,2 +1,10 @@
|
|
1 |
Model,imda_part1_asr_test,imda_part2_asr_test,imda_part3_30s_asr_test,imda_part4_30s_asr_test,imda_part5_30s_asr_test,imda_part6_30s_asr_test
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.042254894789457,0.048088629169710254,0.2919053954978684,0.36589168730903193,0.17694182194919086,0.11292172031202054
|
|
|
|
|
|
1 |
Model,imda_part1_asr_test,imda_part2_asr_test,imda_part3_30s_asr_test,imda_part4_30s_asr_test,imda_part5_30s_asr_test,imda_part6_30s_asr_test
|
2 |
+
whisper_large_v3,,,,,,
|
3 |
+
gemini-1.5-flash,,,,,,
|
4 |
+
Qwen-Audio-Chat,0.10550313315290274,0.45479263046830615,0.6412550574306894,1.173131813552289,0.3016882870525747,0.31394240863063033
|
5 |
+
SALMONN_7B,0.0925804013361617,0.42346400454508565,0.6569229098215983,0.7593582215292535,0.34868891450584405,0.24872817713464365
|
6 |
+
Qwen2-Audio-7B-Instruct,0.07197717796796138,0.1905689473257041,0.35076166942732234,0.5613424034000176,0.27856006770658537,0.2245352799625317
|
7 |
+
WavLLM_fairseq,0.10077292565771828,0.4463923382842302,0.7540934640345399,1.143645714142011,0.39796588405247263,0.42541061709652933
|
8 |
MERaLiON-AudioLLM-Whisper-SEA-LION,0.042254894789457,0.048088629169710254,0.2919053954978684,0.36589168730903193,0.17694182194919086,0.11292172031202054
|
9 |
+
cascade_whisper_large_v3_llama_3_8b_instruct,0.06922195401458074,0.31912994075156237,0.29992939962527493,0.4750971343786543,0.22004640235805695,0.17467982364056267
|
10 |
+
cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct,0.07041669714480775,0.32988393799204613,0.3035544573275043,0.4779640131272869,0.22881615619208825,0.1789273082575623
|