zhuohan-7 commited on
Commit
f8aba47
·
verified ·
1 Parent(s): 21f995e

Upload folder using huggingface_hub

Browse files
examples/AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f33a60a23ffc8fce090307d530ef03b2b8cf9852fa70418e76ed6a1d5dd978
3
+ size 954480
examples/AQA/AudioCaps-QA-Test/dataset_info.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "audio": {
17
+ "dtype": "null",
18
+ "_type": "Value"
19
+ },
20
+ "text": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ }
24
+ },
25
+ "answer": {
26
+ "audio": {
27
+ "dtype": "null",
28
+ "_type": "Value"
29
+ },
30
+ "text": {
31
+ "dtype": "string",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "other_attributes": {
36
+ "audiocap_id": {
37
+ "dtype": "string",
38
+ "_type": "Value"
39
+ },
40
+ "caption": {
41
+ "dtype": "string",
42
+ "_type": "Value"
43
+ },
44
+ "start_time": {
45
+ "dtype": "string",
46
+ "_type": "Value"
47
+ },
48
+ "youtube_id": {
49
+ "dtype": "string",
50
+ "_type": "Value"
51
+ }
52
+ },
53
+ "salmonn_7b": {
54
+ "answer": {
55
+ "dtype": "string",
56
+ "_type": "Value"
57
+ },
58
+ "model_prediction": {
59
+ "dtype": "string",
60
+ "_type": "Value"
61
+ },
62
+ "task_type": {
63
+ "dtype": "string",
64
+ "_type": "Value"
65
+ },
66
+ "text": {
67
+ "dtype": "string",
68
+ "_type": "Value"
69
+ }
70
+ },
71
+ "wavllm_fairseq": {
72
+ "answer": {
73
+ "dtype": "string",
74
+ "_type": "Value"
75
+ },
76
+ "model_prediction": {
77
+ "dtype": "string",
78
+ "_type": "Value"
79
+ },
80
+ "task_type": {
81
+ "dtype": "string",
82
+ "_type": "Value"
83
+ },
84
+ "text": {
85
+ "dtype": "string",
86
+ "_type": "Value"
87
+ }
88
+ },
89
+ "Qwen2-Audio-7B-Instruct": {
90
+ "answer": {
91
+ "dtype": "string",
92
+ "_type": "Value"
93
+ },
94
+ "model_prediction": {
95
+ "dtype": "string",
96
+ "_type": "Value"
97
+ },
98
+ "task_type": {
99
+ "dtype": "string",
100
+ "_type": "Value"
101
+ },
102
+ "text": {
103
+ "dtype": "string",
104
+ "_type": "Value"
105
+ }
106
+ },
107
+ "whisper_large_v3_with_llama_3_8b_instruct": {
108
+ "answer": {
109
+ "dtype": "string",
110
+ "_type": "Value"
111
+ },
112
+ "model_prediction": {
113
+ "dtype": "string",
114
+ "_type": "Value"
115
+ },
116
+ "task_type": {
117
+ "dtype": "string",
118
+ "_type": "Value"
119
+ },
120
+ "text": {
121
+ "dtype": "string",
122
+ "_type": "Value"
123
+ }
124
+ },
125
+ "qwen_audio_chat": {
126
+ "answer": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "model_prediction": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "task_type": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ },
138
+ "text": {
139
+ "dtype": "string",
140
+ "_type": "Value"
141
+ }
142
+ }
143
+ },
144
+ "homepage": "",
145
+ "license": ""
146
+ }
examples/AQA/AudioCaps-QA-Test/sample_0.wav ADDED
Binary file (320 kB). View file
 
examples/AQA/AudioCaps-QA-Test/sample_1.wav ADDED
Binary file (304 kB). View file
 
examples/AQA/AudioCaps-QA-Test/sample_2.wav ADDED
Binary file (320 kB). View file
 
examples/AQA/AudioCaps-QA-Test/state.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "b4d0bc420173574a",
8
+ "_format_columns": [
9
+ "context",
10
+ "instruction",
11
+ "answer",
12
+ "other_attributes",
13
+ "salmonn_7b",
14
+ "wavllm_fairseq",
15
+ "Qwen2-Audio-7B-Instruct",
16
+ "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "qwen_audio_chat"
18
+ ],
19
+ "_format_kwargs": {},
20
+ "_format_type": null,
21
+ "_output_all_columns": false,
22
+ "_split": null
23
+ }
examples/AQA/Clotho-AQA-Test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76c13e88e89bb39d6a4dedc9f3bb30e128cdaaa5f68d8c0e1d804d9af5cbf68
3
+ size 2181384
examples/AQA/Clotho-AQA-Test/dataset_info.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "audio": {
17
+ "dtype": "null",
18
+ "_type": "Value"
19
+ },
20
+ "text": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ }
24
+ },
25
+ "answer": {
26
+ "audio": {
27
+ "dtype": "null",
28
+ "_type": "Value"
29
+ },
30
+ "text": {
31
+ "dtype": "string",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "other_attributes": {},
36
+ "salmonn_7b": {
37
+ "answer": {
38
+ "dtype": "string",
39
+ "_type": "Value"
40
+ },
41
+ "model_prediction": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ },
45
+ "task_type": {
46
+ "dtype": "string",
47
+ "_type": "Value"
48
+ },
49
+ "text": {
50
+ "dtype": "string",
51
+ "_type": "Value"
52
+ }
53
+ },
54
+ "wavllm_fairseq": {
55
+ "answer": {
56
+ "dtype": "string",
57
+ "_type": "Value"
58
+ },
59
+ "model_prediction": {
60
+ "dtype": "string",
61
+ "_type": "Value"
62
+ },
63
+ "task_type": {
64
+ "dtype": "string",
65
+ "_type": "Value"
66
+ },
67
+ "text": {
68
+ "dtype": "string",
69
+ "_type": "Value"
70
+ }
71
+ },
72
+ "Qwen2-Audio-7B-Instruct": {
73
+ "answer": {
74
+ "dtype": "string",
75
+ "_type": "Value"
76
+ },
77
+ "model_prediction": {
78
+ "dtype": "string",
79
+ "_type": "Value"
80
+ },
81
+ "task_type": {
82
+ "dtype": "string",
83
+ "_type": "Value"
84
+ },
85
+ "text": {
86
+ "dtype": "string",
87
+ "_type": "Value"
88
+ }
89
+ },
90
+ "whisper_large_v3_with_llama_3_8b_instruct": {
91
+ "answer": {
92
+ "dtype": "string",
93
+ "_type": "Value"
94
+ },
95
+ "model_prediction": {
96
+ "dtype": "string",
97
+ "_type": "Value"
98
+ },
99
+ "task_type": {
100
+ "dtype": "string",
101
+ "_type": "Value"
102
+ },
103
+ "text": {
104
+ "dtype": "string",
105
+ "_type": "Value"
106
+ }
107
+ },
108
+ "qwen_audio_chat": {
109
+ "answer": {
110
+ "dtype": "string",
111
+ "_type": "Value"
112
+ },
113
+ "model_prediction": {
114
+ "dtype": "string",
115
+ "_type": "Value"
116
+ },
117
+ "task_type": {
118
+ "dtype": "string",
119
+ "_type": "Value"
120
+ },
121
+ "text": {
122
+ "dtype": "string",
123
+ "_type": "Value"
124
+ }
125
+ }
126
+ },
127
+ "homepage": "",
128
+ "license": ""
129
+ }
examples/AQA/Clotho-AQA-Test/sample_0.wav ADDED
Binary file (868 kB). View file
 
examples/AQA/Clotho-AQA-Test/sample_1.wav ADDED
Binary file (668 kB). View file
 
examples/AQA/Clotho-AQA-Test/sample_2.wav ADDED
Binary file (636 kB). View file
 
examples/AQA/Clotho-AQA-Test/state.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "3f05c76553bf311d",
8
+ "_format_columns": [
9
+ "context",
10
+ "instruction",
11
+ "answer",
12
+ "other_attributes",
13
+ "salmonn_7b",
14
+ "wavllm_fairseq",
15
+ "Qwen2-Audio-7B-Instruct",
16
+ "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "qwen_audio_chat"
18
+ ],
19
+ "_format_kwargs": {},
20
+ "_format_type": null,
21
+ "_output_all_columns": false,
22
+ "_split": null
23
+ }
examples/AQA/WavCaps-QA-Test/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183517b37463c54c088f5e1011721003d14587380bc111e8ad7f7cfc60fcd9e5
3
+ size 970376
examples/AQA/WavCaps-QA-Test/dataset_info.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "audio": {
17
+ "dtype": "null",
18
+ "_type": "Value"
19
+ },
20
+ "text": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ }
24
+ },
25
+ "answer": {
26
+ "audio": {
27
+ "dtype": "null",
28
+ "_type": "Value"
29
+ },
30
+ "text": {
31
+ "dtype": "string",
32
+ "_type": "Value"
33
+ }
34
+ },
35
+ "other_attributes": {
36
+ "audio_path": {
37
+ "dtype": "string",
38
+ "_type": "Value"
39
+ },
40
+ "caption": {
41
+ "dtype": "string",
42
+ "_type": "Value"
43
+ },
44
+ "duration": {
45
+ "dtype": "string",
46
+ "_type": "Value"
47
+ }
48
+ },
49
+ "salmonn_7b": {
50
+ "answer": {
51
+ "dtype": "string",
52
+ "_type": "Value"
53
+ },
54
+ "model_prediction": {
55
+ "dtype": "string",
56
+ "_type": "Value"
57
+ },
58
+ "task_type": {
59
+ "dtype": "string",
60
+ "_type": "Value"
61
+ },
62
+ "text": {
63
+ "dtype": "string",
64
+ "_type": "Value"
65
+ }
66
+ },
67
+ "wavllm_fairseq": {
68
+ "answer": {
69
+ "dtype": "string",
70
+ "_type": "Value"
71
+ },
72
+ "model_prediction": {
73
+ "dtype": "string",
74
+ "_type": "Value"
75
+ },
76
+ "task_type": {
77
+ "dtype": "string",
78
+ "_type": "Value"
79
+ },
80
+ "text": {
81
+ "dtype": "string",
82
+ "_type": "Value"
83
+ }
84
+ },
85
+ "Qwen2-Audio-7B-Instruct": {
86
+ "answer": {
87
+ "dtype": "string",
88
+ "_type": "Value"
89
+ },
90
+ "model_prediction": {
91
+ "dtype": "string",
92
+ "_type": "Value"
93
+ },
94
+ "task_type": {
95
+ "dtype": "string",
96
+ "_type": "Value"
97
+ },
98
+ "text": {
99
+ "dtype": "string",
100
+ "_type": "Value"
101
+ }
102
+ },
103
+ "whisper_large_v3_with_llama_3_8b_instruct": {
104
+ "answer": {
105
+ "dtype": "string",
106
+ "_type": "Value"
107
+ },
108
+ "model_prediction": {
109
+ "dtype": "string",
110
+ "_type": "Value"
111
+ },
112
+ "task_type": {
113
+ "dtype": "string",
114
+ "_type": "Value"
115
+ },
116
+ "text": {
117
+ "dtype": "string",
118
+ "_type": "Value"
119
+ }
120
+ },
121
+ "qwen_audio_chat": {
122
+ "answer": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "model_prediction": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "task_type": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "text": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ }
138
+ }
139
+ },
140
+ "homepage": "",
141
+ "license": ""
142
+ }
examples/AQA/WavCaps-QA-Test/sample_0.wav ADDED
Binary file (320 kB). View file
 
examples/AQA/WavCaps-QA-Test/sample_1.wav ADDED
Binary file (320 kB). View file
 
examples/AQA/WavCaps-QA-Test/sample_2.wav ADDED
Binary file (320 kB). View file
 
examples/AQA/WavCaps-QA-Test/state.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "f88396310248e252",
8
+ "_format_columns": [
9
+ "context",
10
+ "instruction",
11
+ "answer",
12
+ "other_attributes",
13
+ "salmonn_7b",
14
+ "wavllm_fairseq",
15
+ "Qwen2-Audio-7B-Instruct",
16
+ "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "qwen_audio_chat"
18
+ ],
19
+ "_format_kwargs": {},
20
+ "_format_type": null,
21
+ "_output_all_columns": false,
22
+ "_split": null
23
+ }