diff --git "a/bbox_json/eval/LUMDdata_box/HallusionBench_only_bbox.json" "b/bbox_json/eval/LUMDdata_box/HallusionBench_only_bbox.json" new file mode 100644--- /dev/null +++ "b/bbox_json/eval/LUMDdata_box/HallusionBench_only_bbox.json" @@ -0,0 +1 @@ +[{"id": "VD_illusion_1_0_0_0", "boxes": [[405, 33, 638, 273], [156, 31, 389, 273], [17, -2, 1201, 732], [32, 27, 750, 698], [978, 242, 1033, 299], [909, 271, 965, 327], [1048, 270, 1103, 326], [28, 252, 265, 491], [338, 308, 454, 429], [530, 251, 761, 490], [882, 244, 1128, 493], [947, 309, 1064, 431], [881, 340, 937, 398], [1076, 340, 1132, 397], [1047, 409, 1103, 467], [909, 409, 964, 467], [978, 438, 1035, 495], [155, 464, 390, 706], [405, 464, 637, 706]], "scores": [0.45751699805259705, 0.4612090587615967, 0.3025868237018585, 0.46012383699417114, 0.28301718831062317, 0.28952541947364807, 0.29497599601745605, 0.4553694427013397, 0.5068652033805847, 0.4286501109600067, 0.37997108697891235, 0.36993423104286194, 0.2736978232860565, 0.30923596024513245, 0.2809922397136688, 0.2898586392402649, 0.2762254774570465, 0.4606761038303375, 0.44789043068885803], "labels": ["circle", "circle", "illustration", "circle", "dot", "dot", "dot", "circle", "circle", "circle", "circle", "circle", "dot", "circle", "dot", "dot", "dot", "circle", "circle"]}, {"id": "VD_illusion_2_30_1_0", "boxes": [[68, 111, 509, 437], [972, 333, 980, 569], [64, 423, 1136, 437], [976, 333, 1251, 569], [514, 429, 971, 565], [978, 417, 1243, 491], [69, 432, 508, 567], [507, 495, 979, 566], [971, 497, 1242, 572], [55, 557, 1099, 573], [68, 567, 511, 799]], "scores": [0.2527632713317871, 0.21127483248710632, 0.26238706707954407, 0.21297460794448853, 0.22513221204280853, 0.22889868915081024, 0.3314000368118286, 0.20102417469024658, 0.25016671419143677, 0.21444670855998993, 0.2906486690044403], "labels": ["square", "line", "line", "rectangle", "rectangle", "rectangle", "rectangle", "line", "rectangle", "line", "rectangle"]}, {"id": "VD_ocr_2_6_1_1", "boxes": [[26, 193, 144, 286], [60, 209, 389, 501], [61, 217, 303, 502]], "scores": [0.29444819688796997, 0.5127168297767639, 0.2009112685918808], "labels": ["flavor", "cake", "icing"]}, {"id": "VD_video_1_1_0_3", "boxes": [[140, 51, 388, 474], [20, 8, 1906, 484]], "scores": [0.26059427857398987, 0.4401450753211975], "labels": ["cartoon character", "cartoon"]}, {"id": "VD_video_1_18_0_1", "boxes": [[2326, 132, 2613, 495], [1173, 121, 1654, 511], [600, 7, 889, 511], [613, 1, 907, 510], [3209, 220, 3506, 499], [138, 228, 702, 512], [1305, 283, 1656, 510], [2113, 331, 2407, 511], [3095, 387, 3446, 512], [3095, 383, 3462, 511], [0, 444, 116, 513], [206, 390, 691, 511], [1076, 426, 1319, 511], [3081, 477, 3226, 513]], "scores": [0.42086729407310486, 0.2226148098707199, 0.2886866331100464, 0.268052339553833, 0.4260094463825226, 0.2396928071975708, 0.41455385088920593, 0.3497925102710724, 0.2368031144142151, 0.35739317536354065, 0.23461446166038513, 0.3686950206756592, 0.3241584897041321, 0.2004246711730957], "labels": ["uniform", "man", "uniform", "man", "uniform", "man", "uniform", "uniform", "uniform", "baseball uniform", "uniform", "uniform", "uniform", "baseball uniform"]}, {"id": "VS_chart_2_9_2_2", "boxes": [[497, 19, 509, 49], [3, 1, 727, 456], [0, 1, 724, 455], [1, 104, 723, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.21998198330402374, 0.37090548872947693, 0.2869301736354828, 0.24609456956386566, 0.21948817372322083, 0.444021075963974, 0.20504862070083618], "labels": ["number", "graph", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_1_5_1_0", "boxes": [[989, 574, 1007, 601], [989, 753, 1007, 780], [989, 798, 1008, 825]], "scores": [0.202177032828331, 0.21557970345020294, 0.20939892530441284], "labels": ["number", "number", "number"]}, {"id": "VS_map_1_4_1_2", "boxes": [[4, 1, 898, 678]], "scores": [0.8334432244300842], "labels": ["map"]}, {"id": "VD_illusion_1_0_0_1", "boxes": [[405, 33, 638, 273], [156, 31, 389, 273], [17, -2, 1201, 732], [32, 27, 750, 698], [978, 242, 1033, 299], [909, 271, 965, 327], [1048, 270, 1103, 326], [28, 252, 265, 491], [338, 308, 454, 429], [530, 251, 761, 490], [882, 244, 1128, 493], [947, 309, 1064, 431], [881, 340, 937, 398], [1076, 340, 1132, 397], [1047, 409, 1103, 467], [909, 409, 964, 467], [978, 438, 1035, 495], [155, 464, 390, 706], [405, 464, 637, 706]], "scores": [0.45751699805259705, 0.4612090587615967, 0.3025868237018585, 0.46012383699417114, 0.28301718831062317, 0.28952541947364807, 0.29497599601745605, 0.4553694427013397, 0.5068652033805847, 0.4286501109600067, 0.37997108697891235, 0.36993423104286194, 0.2736978232860565, 0.30923596024513245, 0.2809922397136688, 0.2898586392402649, 0.2762254774570465, 0.4606761038303375, 0.44789043068885803], "labels": ["circle", "circle", "illustration", "circle", "dot", "dot", "dot", "circle", "circle", "circle", "circle", "circle", "dot", "circle", "dot", "dot", "dot", "circle", "circle"]}, {"id": "VD_illusion_2_30_1_1", "boxes": [[68, 111, 509, 437], [972, 333, 980, 569], [64, 423, 1136, 437], [976, 333, 1251, 569], [514, 429, 971, 565], [978, 417, 1243, 491], [69, 432, 508, 567], [507, 495, 979, 566], [971, 497, 1242, 572], [55, 557, 1099, 573], [68, 567, 511, 799]], "scores": [0.2527632713317871, 0.21127483248710632, 0.26238706707954407, 0.21297460794448853, 0.22513221204280853, 0.22889868915081024, 0.3314000368118286, 0.20102417469024658, 0.25016671419143677, 0.21444670855998993, 0.2906486690044403], "labels": ["square", "line", "line", "rectangle", "rectangle", "rectangle", "rectangle", "line", "rectangle", "line", "rectangle"]}, {"id": "VD_ocr_1_7_0_0", "boxes": [[45, 0, 194, 242], [0, 257, 235, 300]], "scores": [0.767182469367981, 0.4300840198993683], "labels": ["symbol", "calligraphy"]}, {"id": "VD_video_2_1_1_0", "boxes": [[13, 7, 1897, 475], [1581, 50, 1827, 468]], "scores": [0.40279361605644226, 0.27177685499191284], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_1_18_0_2", "boxes": [[2326, 132, 2613, 495], [1173, 121, 1654, 511], [600, 7, 889, 511], [613, 1, 907, 510], [3209, 220, 3506, 499], [138, 228, 702, 512], [1305, 283, 1656, 510], [2113, 331, 2407, 511], [3095, 387, 3446, 512], [3095, 383, 3462, 511], [0, 444, 116, 513], [206, 390, 691, 511], [1076, 426, 1319, 511], [3081, 477, 3226, 513]], "scores": [0.42086729407310486, 0.2226148098707199, 0.2886866331100464, 0.268052339553833, 0.4260094463825226, 0.2396928071975708, 0.41455385088920593, 0.3497925102710724, 0.2368031144142151, 0.35739317536354065, 0.23461446166038513, 0.3686950206756592, 0.3241584897041321, 0.2004246711730957], "labels": ["uniform", "man", "uniform", "man", "uniform", "man", "uniform", "uniform", "uniform", "baseball uniform", "uniform", "uniform", "uniform", "baseball uniform"]}, {"id": "VS_chart_2_9_2_3", "boxes": [[497, 19, 509, 49], [3, 1, 727, 456], [0, 1, 724, 455], [1, 104, 723, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.21998198330402374, 0.37090548872947693, 0.2869301736354828, 0.24609456956386566, 0.21948817372322083, 0.444021075963974, 0.20504862070083618], "labels": ["number", "graph", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_1_5_1_1", "boxes": [[989, 574, 1007, 601], [989, 753, 1007, 780], [989, 798, 1008, 825]], "scores": [0.202177032828331, 0.21557970345020294, 0.20939892530441284], "labels": ["number", "number", "number"]}, {"id": "VS_map_1_4_1_3", "boxes": [[4, 1, 898, 678]], "scores": [0.8334432244300842], "labels": ["map"]}, {"id": "VD_illusion_1_0_0_2", "boxes": [[405, 33, 638, 273], [156, 31, 389, 273], [17, -2, 1201, 732], [32, 27, 750, 698], [978, 242, 1033, 299], [909, 271, 965, 327], [1048, 270, 1103, 326], [28, 252, 265, 491], [338, 308, 454, 429], [530, 251, 761, 490], [882, 244, 1128, 493], [947, 309, 1064, 431], [881, 340, 937, 398], [1076, 340, 1132, 397], [1047, 409, 1103, 467], [909, 409, 964, 467], [978, 438, 1035, 495], [155, 464, 390, 706], [405, 464, 637, 706]], "scores": [0.45751699805259705, 0.4612090587615967, 0.3025868237018585, 0.46012383699417114, 0.28301718831062317, 0.28952541947364807, 0.29497599601745605, 0.4553694427013397, 0.5068652033805847, 0.4286501109600067, 0.37997108697891235, 0.36993423104286194, 0.2736978232860565, 0.30923596024513245, 0.2809922397136688, 0.2898586392402649, 0.2762254774570465, 0.4606761038303375, 0.44789043068885803], "labels": ["circle", "circle", "illustration", "circle", "dot", "dot", "dot", "circle", "circle", "circle", "circle", "circle", "dot", "circle", "dot", "dot", "dot", "circle", "circle"]}, {"id": "VD_math_1_0_0_0", "boxes": [[25, 0, 1322, 734], [31, 436, 1320, 457]], "scores": [0.2289142906665802, 0.21505285799503326], "labels": ["angle", "line"]}, {"id": "VD_ocr_1_7_0_1", "boxes": [[45, 0, 194, 242], [0, 257, 235, 300]], "scores": [0.767182469367981, 0.4300840198993683], "labels": ["symbol", "calligraphy"]}, {"id": "VD_video_2_1_1_1", "boxes": [[13, 7, 1897, 475], [1581, 50, 1827, 468]], "scores": [0.40279361605644226, 0.27177685499191284], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_1_18_0_3", "boxes": [[2326, 132, 2613, 495], [1173, 121, 1654, 511], [600, 7, 889, 511], [613, 1, 907, 510], [3209, 220, 3506, 499], [138, 228, 702, 512], [1305, 283, 1656, 510], [2113, 331, 2407, 511], [3095, 387, 3446, 512], [3095, 383, 3462, 511], [0, 444, 116, 513], [206, 390, 691, 511], [1076, 426, 1319, 511], [3081, 477, 3226, 513]], "scores": [0.42086729407310486, 0.2226148098707199, 0.2886866331100464, 0.268052339553833, 0.4260094463825226, 0.2396928071975708, 0.41455385088920593, 0.3497925102710724, 0.2368031144142151, 0.35739317536354065, 0.23461446166038513, 0.3686950206756592, 0.3241584897041321, 0.2004246711730957], "labels": ["uniform", "man", "uniform", "man", "uniform", "man", "uniform", "uniform", "uniform", "baseball uniform", "uniform", "uniform", "uniform", "baseball uniform"]}, {"id": "VS_chart_0_10_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_5_1_2", "boxes": [[989, 574, 1007, 601], [989, 753, 1007, 780], [989, 798, 1008, 825]], "scores": [0.202177032828331, 0.21557970345020294, 0.20939892530441284], "labels": ["number", "number", "number"]}, {"id": "VS_map_2_4_2_0", "boxes": [[4, 1, 899, 678]], "scores": [0.8396804928779602], "labels": ["map"]}, {"id": "VD_illusion_2_0_1_0", "boxes": [[814, 129, 919, 235], [8, -3, 1194, 738], [975, 182, 1079, 286], [143, 182, 274, 316], [283, 183, 413, 315], [675, 183, 780, 287], [74, 181, 479, 556], [71, 304, 204, 437], [352, 305, 482, 436], [749, 252, 986, 485], [244, 335, 309, 403], [608, 323, 711, 428], [1016, 323, 1122, 429], [85, 136, 1124, 597], [613, 131, 1123, 598], [141, 423, 272, 558], [283, 423, 412, 557], [656, 466, 762, 573], [974, 466, 1079, 573], [821, 498, 927, 604]], "scores": [0.3856346905231476, 0.30852851271629333, 0.41734981536865234, 0.40262508392333984, 0.4161655902862549, 0.4067413806915283, 0.3571634590625763, 0.4128502309322357, 0.40788179636001587, 0.4593852460384369, 0.4414055645465851, 0.38463476300239563, 0.42180541157722473, 0.2209423929452896, 0.384475439786911, 0.3955208361148834, 0.37920495867729187, 0.4018233120441437, 0.4053558111190796, 0.3864176273345947], "labels": ["circle", "illustration", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_1_0_0_1", "boxes": [[25, 0, 1322, 734], [31, 436, 1320, 457]], "scores": [0.2289142906665802, 0.21505285799503326], "labels": ["angle", "line"]}, {"id": "VD_ocr_2_7_1_0", "boxes": [[57, 3, 209, 253], [5, 272, 219, 316]], "scores": [0.6946797966957092, 0.41633591055870056], "labels": ["design", "calligraphy"]}, {"id": "VD_video_2_1_1_2", "boxes": [[13, 7, 1897, 475], [1581, 50, 1827, 468]], "scores": [0.40279361605644226, 0.27177685499191284], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_1_0", "boxes": [[1405, 114, 1713, 482], [2027, 113, 2513, 506], [3296, 3, 3605, 511], [509, 208, 796, 490], [3293, 4, 3596, 504], [379, 217, 789, 502], [2868, 224, 3406, 508], [1204, 322, 1517, 506], [402, 383, 743, 505], [2701, 437, 2815, 506], [2697, 296, 2914, 506], [1981, 416, 2207, 506], [2263, 290, 2554, 506], [2889, 391, 3366, 507]], "scores": [0.3910696506500244, 0.22520208358764648, 0.22028246521949768, 0.3908746540546417, 0.2870635986328125, 0.2272995412349701, 0.22384126484394073, 0.3357642889022827, 0.37701642513275146, 0.23851566016674042, 0.2072867900133133, 0.3213064968585968, 0.46192288398742676, 0.30488160252571106], "labels": ["uniform", "man", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_0_10_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_5_1_3", "boxes": [[989, 574, 1007, 601], [989, 753, 1007, 780], [989, 798, 1008, 825]], "scores": [0.202177032828331, 0.21557970345020294, 0.20939892530441284], "labels": ["number", "number", "number"]}, {"id": "VS_map_2_4_2_1", "boxes": [[4, 1, 899, 678]], "scores": [0.8396804928779602], "labels": ["map"]}, {"id": "VD_illusion_2_0_1_1", "boxes": [[814, 129, 919, 235], [8, -3, 1194, 738], [975, 182, 1079, 286], [143, 182, 274, 316], [283, 183, 413, 315], [675, 183, 780, 287], [74, 181, 479, 556], [71, 304, 204, 437], [352, 305, 482, 436], [749, 252, 986, 485], [244, 335, 309, 403], [608, 323, 711, 428], [1016, 323, 1122, 429], [85, 136, 1124, 597], [613, 131, 1123, 598], [141, 423, 272, 558], [283, 423, 412, 557], [656, 466, 762, 573], [974, 466, 1079, 573], [821, 498, 927, 604]], "scores": [0.3856346905231476, 0.30852851271629333, 0.41734981536865234, 0.40262508392333984, 0.4161655902862549, 0.4067413806915283, 0.3571634590625763, 0.4128502309322357, 0.40788179636001587, 0.4593852460384369, 0.4414055645465851, 0.38463476300239563, 0.42180541157722473, 0.2209423929452896, 0.384475439786911, 0.3955208361148834, 0.37920495867729187, 0.4018233120441437, 0.4053558111190796, 0.3864176273345947], "labels": ["circle", "illustration", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_1_0_0_2", "boxes": [[25, 0, 1322, 734], [31, 436, 1320, 457]], "scores": [0.2289142906665802, 0.21505285799503326], "labels": ["angle", "line"]}, {"id": "VD_ocr_2_7_1_1", "boxes": [[57, 3, 209, 253], [5, 272, 219, 316]], "scores": [0.6946797966957092, 0.41633591055870056], "labels": ["design", "calligraphy"]}, {"id": "VD_video_2_1_1_3", "boxes": [[13, 7, 1897, 475], [1581, 50, 1827, 468]], "scores": [0.40279361605644226, 0.27177685499191284], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_1_1", "boxes": [[1405, 114, 1713, 482], [2027, 113, 2513, 506], [3296, 3, 3605, 511], [509, 208, 796, 490], [3293, 4, 3596, 504], [379, 217, 789, 502], [2868, 224, 3406, 508], [1204, 322, 1517, 506], [402, 383, 743, 505], [2701, 437, 2815, 506], [2697, 296, 2914, 506], [1981, 416, 2207, 506], [2263, 290, 2554, 506], [2889, 391, 3366, 507]], "scores": [0.3910696506500244, 0.22520208358764648, 0.22028246521949768, 0.3908746540546417, 0.2870635986328125, 0.2272995412349701, 0.22384126484394073, 0.3357642889022827, 0.37701642513275146, 0.23851566016674042, 0.2072867900133133, 0.3213064968585968, 0.46192288398742676, 0.30488160252571106], "labels": ["uniform", "man", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_0_10_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_5_2_0", "boxes": [[975, 510, 993, 537], [975, 646, 993, 673], [974, 780, 993, 807]], "scores": [0.20115964114665985, 0.20461080968379974, 0.21302077174186707], "labels": ["number", "number", "number"]}, {"id": "VS_map_2_4_2_2", "boxes": [[4, 1, 899, 678]], "scores": [0.8396804928779602], "labels": ["map"]}, {"id": "VD_illusion_2_0_1_2", "boxes": [[814, 129, 919, 235], [8, -3, 1194, 738], [975, 182, 1079, 286], [143, 182, 274, 316], [283, 183, 413, 315], [675, 183, 780, 287], [74, 181, 479, 556], [71, 304, 204, 437], [352, 305, 482, 436], [749, 252, 986, 485], [244, 335, 309, 403], [608, 323, 711, 428], [1016, 323, 1122, 429], [85, 136, 1124, 597], [613, 131, 1123, 598], [141, 423, 272, 558], [283, 423, 412, 557], [656, 466, 762, 573], [974, 466, 1079, 573], [821, 498, 927, 604]], "scores": [0.3856346905231476, 0.30852851271629333, 0.41734981536865234, 0.40262508392333984, 0.4161655902862549, 0.4067413806915283, 0.3571634590625763, 0.4128502309322357, 0.40788179636001587, 0.4593852460384369, 0.4414055645465851, 0.38463476300239563, 0.42180541157722473, 0.2209423929452896, 0.384475439786911, 0.3955208361148834, 0.37920495867729187, 0.4018233120441437, 0.4053558111190796, 0.3864176273345947], "labels": ["circle", "illustration", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_2_0_1_0", "boxes": [[9, 20, 1286, 760], [10, 461, 1282, 486]], "scores": [0.414570689201355, 0.21694150567054749], "labels": ["angle", "line"]}, {"id": "VD_ocr_1_8_0_0", "boxes": [[2, 35, 483, 401], [35, 185, 431, 343]], "scores": [0.37494370341300964, 0.7246512174606323], "labels": ["package", "beef"]}, {"id": "VD_video_2_1_2_0", "boxes": [[1, 2, 503, 126], [38, 15, 103, 124]], "scores": [0.30356717109680176, 0.24037852883338928], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_1_2", "boxes": [[1405, 114, 1713, 482], [2027, 113, 2513, 506], [3296, 3, 3605, 511], [509, 208, 796, 490], [3293, 4, 3596, 504], [379, 217, 789, 502], [2868, 224, 3406, 508], [1204, 322, 1517, 506], [402, 383, 743, 505], [2701, 437, 2815, 506], [2697, 296, 2914, 506], [1981, 416, 2207, 506], [2263, 290, 2554, 506], [2889, 391, 3366, 507]], "scores": [0.3910696506500244, 0.22520208358764648, 0.22028246521949768, 0.3908746540546417, 0.2870635986328125, 0.2272995412349701, 0.22384126484394073, 0.3357642889022827, 0.37701642513275146, 0.23851566016674042, 0.2072867900133133, 0.3213064968585968, 0.46192288398742676, 0.30488160252571106], "labels": ["uniform", "man", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_0_10_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_5_2_1", "boxes": [[975, 510, 993, 537], [975, 646, 993, 673], [974, 780, 993, 807]], "scores": [0.20115964114665985, 0.20461080968379974, 0.21302077174186707], "labels": ["number", "number", "number"]}, {"id": "VS_map_2_4_2_3", "boxes": [[4, 1, 899, 678]], "scores": [0.8396804928779602], "labels": ["map"]}, {"id": "VD_illusion_1_1_0_0", "boxes": [[346, 14, 592, 245], [21, 13, 271, 245], [667, 14, 915, 244], [1387, 126, 1445, 215], [1482, 126, 1539, 215], [1582, 126, 1640, 216], [1681, 126, 1741, 215], [1291, 119, 1744, 585], [2, -2, 900, 759], [21, -6, 1776, 785], [1303, 240, 1362, 327], [1371, 224, 1672, 470], [1681, 241, 1741, 328], [20, 270, 271, 496], [320, 260, 617, 507], [670, 271, 916, 494], [1304, 354, 1362, 443], [1681, 354, 1742, 443], [1303, 487, 1361, 575], [1387, 487, 1445, 576], [1482, 487, 1539, 575], [1576, 486, 1634, 575], [1681, 486, 1742, 576], [19, 523, 271, 750], [669, 524, 914, 749], [345, 522, 591, 750]], "scores": [0.4563072919845581, 0.4303613603115082, 0.44537273049354553, 0.20765745639801025, 0.20633481442928314, 0.2155313640832901, 0.2603597939014435, 0.3089824616909027, 0.2753623425960541, 0.20913510024547577, 0.2614555060863495, 0.47150468826293945, 0.31622329354286194, 0.4767369329929352, 0.5399768352508545, 0.486549973487854, 0.2804463505744934, 0.312422513961792, 0.23770219087600708, 0.25277218222618103, 0.2452496737241745, 0.24455134570598602, 0.3046567738056183, 0.4494137167930603, 0.44278499484062195, 0.4564187228679657], "labels": ["square", "square", "square", "square", "square", "square", "square", "rectangle", "square", "rectangle", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_2_0_1_1", "boxes": [[9, 20, 1286, 760], [10, 461, 1282, 486]], "scores": [0.414570689201355, 0.21694150567054749], "labels": ["angle", "line"]}, {"id": "VD_ocr_1_8_0_1", "boxes": [[2, 35, 483, 401], [35, 185, 431, 343]], "scores": [0.37494370341300964, 0.7246512174606323], "labels": ["package", "beef"]}, {"id": "VD_video_2_1_2_1", "boxes": [[1, 2, 503, 126], [38, 15, 103, 124]], "scores": [0.30356717109680176, 0.24037852883338928], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_1_3", "boxes": [[1405, 114, 1713, 482], [2027, 113, 2513, 506], [3296, 3, 3605, 511], [509, 208, 796, 490], [3293, 4, 3596, 504], [379, 217, 789, 502], [2868, 224, 3406, 508], [1204, 322, 1517, 506], [402, 383, 743, 505], [2701, 437, 2815, 506], [2697, 296, 2914, 506], [1981, 416, 2207, 506], [2263, 290, 2554, 506], [2889, 391, 3366, 507]], "scores": [0.3910696506500244, 0.22520208358764648, 0.22028246521949768, 0.3908746540546417, 0.2870635986328125, 0.2272995412349701, 0.22384126484394073, 0.3357642889022827, 0.37701642513275146, 0.23851566016674042, 0.2072867900133133, 0.3213064968585968, 0.46192288398742676, 0.30488160252571106], "labels": ["uniform", "man", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "man", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_1_10_1_0", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20248757302761078, 0.362436980009079, 0.44761165976524353, 0.4271913468837738, 0.24556989967823029, 0.29389557242393494, 0.27326029539108276, 0.20904549956321716, 0.37099993228912354], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_2_2", "boxes": [[975, 510, 993, 537], [975, 646, 993, 673], [974, 780, 993, 807]], "scores": [0.20115964114665985, 0.20461080968379974, 0.21302077174186707], "labels": ["number", "number", "number"]}, {"id": "VS_map_0_5_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_1_0_1", "boxes": [[346, 14, 592, 245], [21, 13, 271, 245], [667, 14, 915, 244], [1387, 126, 1445, 215], [1482, 126, 1539, 215], [1582, 126, 1640, 216], [1681, 126, 1741, 215], [1291, 119, 1744, 585], [2, -2, 900, 759], [21, -6, 1776, 785], [1303, 240, 1362, 327], [1371, 224, 1672, 470], [1681, 241, 1741, 328], [20, 270, 271, 496], [320, 260, 617, 507], [670, 271, 916, 494], [1304, 354, 1362, 443], [1681, 354, 1742, 443], [1303, 487, 1361, 575], [1387, 487, 1445, 576], [1482, 487, 1539, 575], [1576, 486, 1634, 575], [1681, 486, 1742, 576], [19, 523, 271, 750], [669, 524, 914, 749], [345, 522, 591, 750]], "scores": [0.4563072919845581, 0.4303613603115082, 0.44537273049354553, 0.20765745639801025, 0.20633481442928314, 0.2155313640832901, 0.2603597939014435, 0.3089824616909027, 0.2753623425960541, 0.20913510024547577, 0.2614555060863495, 0.47150468826293945, 0.31622329354286194, 0.4767369329929352, 0.5399768352508545, 0.486549973487854, 0.2804463505744934, 0.312422513961792, 0.23770219087600708, 0.25277218222618103, 0.2452496737241745, 0.24455134570598602, 0.3046567738056183, 0.4494137167930603, 0.44278499484062195, 0.4564187228679657], "labels": ["square", "square", "square", "square", "square", "square", "square", "rectangle", "square", "rectangle", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_2_0_1_2", "boxes": [[9, 20, 1286, 760], [10, 461, 1282, 486]], "scores": [0.414570689201355, 0.21694150567054749], "labels": ["angle", "line"]}, {"id": "VD_ocr_2_8_1_0", "boxes": [[16, 22, 522, 405], [51, 177, 467, 343]], "scores": [0.3845231533050537, 0.6945052742958069], "labels": ["package", "beef"]}, {"id": "VD_video_2_1_2_2", "boxes": [[1, 2, 503, 126], [38, 15, 103, 124]], "scores": [0.30356717109680176, 0.24037852883338928], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_2_0", "boxes": [[2335, 124, 2617, 486], [3017, 119, 3465, 512], [597, 2, 885, 509], [605, 0, 901, 506], [1410, 215, 1699, 490], [3105, 285, 3462, 509], [138, 226, 697, 509], [2111, 325, 2399, 509], [1298, 377, 1665, 505], [0, 439, 118, 510], [193, 384, 687, 507], [2877, 421, 3120, 512]], "scores": [0.4002908170223236, 0.2083888053894043, 0.2590126097202301, 0.2626343071460724, 0.4039895236492157, 0.4618055820465088, 0.22791017591953278, 0.3552185297012329, 0.3804488480091095, 0.23769226670265198, 0.39057764410972595, 0.36095842719078064], "labels": ["uniform", "man", "uniform", "man", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_1_10_1_1", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20248757302761078, 0.362436980009079, 0.44761165976524353, 0.4271913468837738, 0.24556989967823029, 0.29389557242393494, 0.27326029539108276, 0.20904549956321716, 0.37099993228912354], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_2_3", "boxes": [[975, 510, 993, 537], [975, 646, 993, 673], [974, 780, 993, 807]], "scores": [0.20115964114665985, 0.20461080968379974, 0.21302077174186707], "labels": ["number", "number", "number"]}, {"id": "VS_map_0_5_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_1_0_2", "boxes": [[346, 14, 592, 245], [21, 13, 271, 245], [667, 14, 915, 244], [1387, 126, 1445, 215], [1482, 126, 1539, 215], [1582, 126, 1640, 216], [1681, 126, 1741, 215], [1291, 119, 1744, 585], [2, -2, 900, 759], [21, -6, 1776, 785], [1303, 240, 1362, 327], [1371, 224, 1672, 470], [1681, 241, 1741, 328], [20, 270, 271, 496], [320, 260, 617, 507], [670, 271, 916, 494], [1304, 354, 1362, 443], [1681, 354, 1742, 443], [1303, 487, 1361, 575], [1387, 487, 1445, 576], [1482, 487, 1539, 575], [1576, 486, 1634, 575], [1681, 486, 1742, 576], [19, 523, 271, 750], [669, 524, 914, 749], [345, 522, 591, 750]], "scores": [0.4563072919845581, 0.4303613603115082, 0.44537273049354553, 0.20765745639801025, 0.20633481442928314, 0.2155313640832901, 0.2603597939014435, 0.3089824616909027, 0.2753623425960541, 0.20913510024547577, 0.2614555060863495, 0.47150468826293945, 0.31622329354286194, 0.4767369329929352, 0.5399768352508545, 0.486549973487854, 0.2804463505744934, 0.312422513961792, 0.23770219087600708, 0.25277218222618103, 0.2452496737241745, 0.24455134570598602, 0.3046567738056183, 0.4494137167930603, 0.44278499484062195, 0.4564187228679657], "labels": ["square", "square", "square", "square", "square", "square", "square", "rectangle", "square", "rectangle", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_1_1_0_0", "boxes": [[227, 114, 243, 136], [45, 10, 483, 311]], "scores": [0.228157639503479, 0.3274073898792267], "labels": ["angle", "angle"]}, {"id": "VD_ocr_2_8_1_1", "boxes": [[16, 22, 522, 405], [51, 177, 467, 343]], "scores": [0.3845231533050537, 0.6945052742958069], "labels": ["package", "beef"]}, {"id": "VD_video_2_1_2_3", "boxes": [[1, 2, 503, 126], [38, 15, 103, 124]], "scores": [0.30356717109680176, 0.24037852883338928], "labels": ["cartoon", "cartoon character"]}, {"id": "VD_video_2_18_2_1", "boxes": [[2335, 124, 2617, 486], [3017, 119, 3465, 512], [597, 2, 885, 509], [605, 0, 901, 506], [1410, 215, 1699, 490], [3105, 285, 3462, 509], [138, 226, 697, 509], [2111, 325, 2399, 509], [1298, 377, 1665, 505], [0, 439, 118, 510], [193, 384, 687, 507], [2877, 421, 3120, 512]], "scores": [0.4002908170223236, 0.2083888053894043, 0.2590126097202301, 0.2626343071460724, 0.4039895236492157, 0.4618055820465088, 0.22791017591953278, 0.3552185297012329, 0.3804488480091095, 0.23769226670265198, 0.39057764410972595, 0.36095842719078064], "labels": ["uniform", "man", "uniform", "man", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_1_10_1_2", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20248757302761078, 0.362436980009079, 0.44761165976524353, 0.4271913468837738, 0.24556989967823029, 0.29389557242393494, 0.27326029539108276, 0.20904549956321716, 0.37099993228912354], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_3_0", "boxes": [[966, 299, 986, 324], [966, 377, 986, 403], [957, 456, 976, 482], [496, 535, 515, 561], [966, 535, 986, 561], [966, 613, 985, 640], [966, 770, 986, 797]], "scores": [0.2398996651172638, 0.2337634265422821, 0.23518456518650055, 0.20370903611183167, 0.26720917224884033, 0.2997090816497803, 0.3071097135543823], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_5_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_1_1_0", "boxes": [[17, 15, 237, 217], [302, 15, 518, 218], [584, 15, 799, 218], [3, 4, 784, 670], [1143, 215, 1194, 293], [1215, 214, 1266, 292], [1289, 214, 1340, 291], [17, 238, 236, 439], [279, 230, 541, 449], [584, 239, 796, 438], [1142, 314, 1195, 393], [1215, 315, 1266, 393], [1288, 315, 1340, 393], [1136, 213, 1345, 496], [1142, 417, 1194, 496], [1215, 415, 1266, 495], [1288, 415, 1340, 495], [585, 463, 797, 662], [17, 463, 235, 664], [302, 462, 517, 663]], "scores": [0.4330611824989319, 0.4455455541610718, 0.43375512957572937, 0.26699361205101013, 0.25973761081695557, 0.2660048305988312, 0.30232661962509155, 0.4690245985984802, 0.5367468595504761, 0.4676838219165802, 0.32190242409706116, 0.44287702441215515, 0.35829585790634155, 0.2527599036693573, 0.29500922560691833, 0.30823639035224915, 0.34139934182167053, 0.4395630955696106, 0.443494975566864, 0.4449614882469177], "labels": ["square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_1_1_0_1", "boxes": [[227, 114, 243, 136], [45, 10, 483, 311]], "scores": [0.228157639503479, 0.3274073898792267], "labels": ["angle", "angle"]}, {"id": "VD_ocr_1_9_0_0", "boxes": [[17, 0, 370, 126], [172, 176, 224, 219], [0, 204, 366, 494], [111, 189, 177, 254], [111, 176, 240, 263], [6, 172, 364, 434], [99, 241, 259, 403], [158, 359, 224, 411], [158, 406, 213, 440]], "scores": [0.2687782049179077, 0.28380098938941956, 0.5150157809257507, 0.3300532102584839, 0.35357701778411865, 0.8402150869369507, 0.21557283401489258, 0.3334580957889557, 0.31031185388565063], "labels": ["plate", "berry", "plate", "berry", "berry", "cheesecake", "cream", "berry", "berry"]}, {"id": "VD_video_1_2_0_0", "boxes": [[409, 9, 420, 23], [228, 9, 251, 62], [287, 25, 299, 39], [94, 35, 124, 93], [5, 32, 66, 102], [320, 43, 360, 112], [35, 71, 44, 81], [96, 69, 104, 81], [158, 45, 221, 114], [283, 39, 326, 109], [448, 68, 456, 81], [467, 69, 479, 83], [2, 7, 509, 130], [261, 57, 377, 128], [111, 83, 124, 90], [141, 69, 200, 128], [146, 71, 221, 125], [262, 89, 300, 127], [421, 75, 447, 129], [309, 91, 322, 103], [343, 101, 351, 111], [84, 80, 119, 128], [207, 104, 221, 117], [339, 101, 350, 111], [439, 63, 476, 128], [142, 107, 153, 124], [2, 38, 246, 128], [409, 121, 418, 127]], "scores": [0.28542420268058777, 0.28487035632133484, 0.36523717641830444, 0.4311409592628479, 0.4919242858886719, 0.396202027797699, 0.6826230883598328, 0.2101944386959076, 0.40151289105415344, 0.38739120960235596, 0.3113582730293274, 0.39765074849128723, 0.263857364654541, 0.20516516268253326, 0.21645647287368774, 0.42017483711242676, 0.3165283203125, 0.30698636174201965, 0.4217240810394287, 0.29491209983825684, 0.2094983011484146, 0.3914622962474823, 0.24434015154838562, 0.39745667576789856, 0.43129435181617737, 0.2708623707294464, 0.30795127153396606, 0.34617385268211365], "labels": ["basketball", "basketball player", "basketball", "basketball player", "player", "basketball player", "basketball", "athletic", "player", "player", "basketball", "basketball", "image", "playingfield", "athletic", "basketball player", "player", "basketball player", "basketball player", "athletic", "basketball", "player", "athletic", "basketball", "player", "athletic", "playingfield", "basketball"]}, {"id": "VD_video_2_18_2_2", "boxes": [[2335, 124, 2617, 486], [3017, 119, 3465, 512], [597, 2, 885, 509], [605, 0, 901, 506], [1410, 215, 1699, 490], [3105, 285, 3462, 509], [138, 226, 697, 509], [2111, 325, 2399, 509], [1298, 377, 1665, 505], [0, 439, 118, 510], [193, 384, 687, 507], [2877, 421, 3120, 512]], "scores": [0.4002908170223236, 0.2083888053894043, 0.2590126097202301, 0.2626343071460724, 0.4039895236492157, 0.4618055820465088, 0.22791017591953278, 0.3552185297012329, 0.3804488480091095, 0.23769226670265198, 0.39057764410972595, 0.36095842719078064], "labels": ["uniform", "man", "uniform", "man", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_1_10_1_3", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20248757302761078, 0.362436980009079, 0.44761165976524353, 0.4271913468837738, 0.24556989967823029, 0.29389557242393494, 0.27326029539108276, 0.20904549956321716, 0.37099993228912354], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_3_1", "boxes": [[966, 299, 986, 324], [966, 377, 986, 403], [957, 456, 976, 482], [496, 535, 515, 561], [966, 535, 986, 561], [966, 613, 985, 640], [966, 770, 986, 797]], "scores": [0.2398996651172638, 0.2337634265422821, 0.23518456518650055, 0.20370903611183167, 0.26720917224884033, 0.2997090816497803, 0.3071097135543823], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_5_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_1_1_1", "boxes": [[17, 15, 237, 217], [302, 15, 518, 218], [584, 15, 799, 218], [3, 4, 784, 670], [1143, 215, 1194, 293], [1215, 214, 1266, 292], [1289, 214, 1340, 291], [17, 238, 236, 439], [279, 230, 541, 449], [584, 239, 796, 438], [1142, 314, 1195, 393], [1215, 315, 1266, 393], [1288, 315, 1340, 393], [1136, 213, 1345, 496], [1142, 417, 1194, 496], [1215, 415, 1266, 495], [1288, 415, 1340, 495], [585, 463, 797, 662], [17, 463, 235, 664], [302, 462, 517, 663]], "scores": [0.4330611824989319, 0.4455455541610718, 0.43375512957572937, 0.26699361205101013, 0.25973761081695557, 0.2660048305988312, 0.30232661962509155, 0.4690245985984802, 0.5367468595504761, 0.4676838219165802, 0.32190242409706116, 0.44287702441215515, 0.35829585790634155, 0.2527599036693573, 0.29500922560691833, 0.30823639035224915, 0.34139934182167053, 0.4395630955696106, 0.443494975566864, 0.4449614882469177], "labels": ["square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_1_1_0_2", "boxes": [[227, 114, 243, 136], [45, 10, 483, 311]], "scores": [0.228157639503479, 0.3274073898792267], "labels": ["angle", "angle"]}, {"id": "VD_ocr_1_9_0_1", "boxes": [[17, 0, 370, 126], [172, 176, 224, 219], [0, 204, 366, 494], [111, 189, 177, 254], [111, 176, 240, 263], [6, 172, 364, 434], [99, 241, 259, 403], [158, 359, 224, 411], [158, 406, 213, 440]], "scores": [0.2687782049179077, 0.28380098938941956, 0.5150157809257507, 0.3300532102584839, 0.35357701778411865, 0.8402150869369507, 0.21557283401489258, 0.3334580957889557, 0.31031185388565063], "labels": ["plate", "berry", "plate", "berry", "berry", "cheesecake", "cream", "berry", "berry"]}, {"id": "VD_video_1_2_0_1", "boxes": [[409, 9, 420, 23], [228, 9, 251, 62], [287, 25, 299, 39], [94, 35, 124, 93], [5, 32, 66, 102], [320, 43, 360, 112], [35, 71, 44, 81], [96, 69, 104, 81], [158, 45, 221, 114], [283, 39, 326, 109], [448, 68, 456, 81], [467, 69, 479, 83], [2, 7, 509, 130], [261, 57, 377, 128], [111, 83, 124, 90], [141, 69, 200, 128], [146, 71, 221, 125], [262, 89, 300, 127], [421, 75, 447, 129], [309, 91, 322, 103], [343, 101, 351, 111], [84, 80, 119, 128], [207, 104, 221, 117], [339, 101, 350, 111], [439, 63, 476, 128], [142, 107, 153, 124], [2, 38, 246, 128], [409, 121, 418, 127]], "scores": [0.28542420268058777, 0.28487035632133484, 0.36523717641830444, 0.4311409592628479, 0.4919242858886719, 0.396202027797699, 0.6826230883598328, 0.2101944386959076, 0.40151289105415344, 0.38739120960235596, 0.3113582730293274, 0.39765074849128723, 0.263857364654541, 0.20516516268253326, 0.21645647287368774, 0.42017483711242676, 0.3165283203125, 0.30698636174201965, 0.4217240810394287, 0.29491209983825684, 0.2094983011484146, 0.3914622962474823, 0.24434015154838562, 0.39745667576789856, 0.43129435181617737, 0.2708623707294464, 0.30795127153396606, 0.34617385268211365], "labels": ["basketball", "basketball player", "basketball", "basketball player", "player", "basketball player", "basketball", "athletic", "player", "player", "basketball", "basketball", "image", "playingfield", "athletic", "basketball player", "player", "basketball player", "basketball player", "athletic", "basketball", "player", "athletic", "basketball", "player", "athletic", "playingfield", "basketball"]}, {"id": "VD_video_2_18_2_3", "boxes": [[2335, 124, 2617, 486], [3017, 119, 3465, 512], [597, 2, 885, 509], [605, 0, 901, 506], [1410, 215, 1699, 490], [3105, 285, 3462, 509], [138, 226, 697, 509], [2111, 325, 2399, 509], [1298, 377, 1665, 505], [0, 439, 118, 510], [193, 384, 687, 507], [2877, 421, 3120, 512]], "scores": [0.4002908170223236, 0.2083888053894043, 0.2590126097202301, 0.2626343071460724, 0.4039895236492157, 0.4618055820465088, 0.22791017591953278, 0.3552185297012329, 0.3804488480091095, 0.23769226670265198, 0.39057764410972595, 0.36095842719078064], "labels": ["uniform", "man", "uniform", "man", "uniform", "uniform", "man", "uniform", "uniform", "uniform", "uniform", "uniform"]}, {"id": "VS_chart_2_10_2_0", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20262987911701202, 0.36390820145606995, 0.4472777247428894, 0.4253298044204712, 0.24382619559764862, 0.29275593161582947, 0.27370691299438477, 0.2088310867547989, 0.37130486965179443], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_3_2", "boxes": [[966, 299, 986, 324], [966, 377, 986, 403], [957, 456, 976, 482], [496, 535, 515, 561], [966, 535, 986, 561], [966, 613, 985, 640], [966, 770, 986, 797]], "scores": [0.2398996651172638, 0.2337634265422821, 0.23518456518650055, 0.20370903611183167, 0.26720917224884033, 0.2997090816497803, 0.3071097135543823], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_5_1_0", "boxes": [[2, -3, 926, 611], [15, 95, 874, 571]], "scores": [0.7825090289115906, 0.20423205196857452], "labels": ["map", "map"]}, {"id": "VD_illusion_2_1_1_2", "boxes": [[17, 15, 237, 217], [302, 15, 518, 218], [584, 15, 799, 218], [3, 4, 784, 670], [1143, 215, 1194, 293], [1215, 214, 1266, 292], [1289, 214, 1340, 291], [17, 238, 236, 439], [279, 230, 541, 449], [584, 239, 796, 438], [1142, 314, 1195, 393], [1215, 315, 1266, 393], [1288, 315, 1340, 393], [1136, 213, 1345, 496], [1142, 417, 1194, 496], [1215, 415, 1266, 495], [1288, 415, 1340, 495], [585, 463, 797, 662], [17, 463, 235, 664], [302, 462, 517, 663]], "scores": [0.4330611824989319, 0.4455455541610718, 0.43375512957572937, 0.26699361205101013, 0.25973761081695557, 0.2660048305988312, 0.30232661962509155, 0.4690245985984802, 0.5367468595504761, 0.4676838219165802, 0.32190242409706116, 0.44287702441215515, 0.35829585790634155, 0.2527599036693573, 0.29500922560691833, 0.30823639035224915, 0.34139934182167053, 0.4395630955696106, 0.443494975566864, 0.4449614882469177], "labels": ["square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_2_1_1_0", "boxes": [[64, 40, 677, 308]], "scores": [0.6838505864143372], "labels": ["triangle"]}, {"id": "VD_ocr_2_9_1_0", "boxes": [[23, 0, 498, 173], [235, 242, 311, 303], [1, 279, 498, 676], [151, 260, 238, 348], [149, 241, 329, 361], [236, 289, 316, 349], [8, 236, 498, 597], [133, 339, 355, 551], [217, 494, 307, 566], [216, 559, 291, 604]], "scores": [0.23231002688407898, 0.2696240544319153, 0.4447494447231293, 0.3346000909805298, 0.2995230257511139, 0.22970399260520935, 0.7660083770751953, 0.22466279566287994, 0.29208919405937195, 0.30653566122055054], "labels": ["plate", "berry", "plate", "blackberry", "berry", "berry", "cheesecake", "cream", "berry", "berry"]}, {"id": "VD_video_1_2_0_2", "boxes": [[409, 9, 420, 23], [228, 9, 251, 62], [287, 25, 299, 39], [94, 35, 124, 93], [5, 32, 66, 102], [320, 43, 360, 112], [35, 71, 44, 81], [96, 69, 104, 81], [158, 45, 221, 114], [283, 39, 326, 109], [448, 68, 456, 81], [467, 69, 479, 83], [2, 7, 509, 130], [261, 57, 377, 128], [111, 83, 124, 90], [141, 69, 200, 128], [146, 71, 221, 125], [262, 89, 300, 127], [421, 75, 447, 129], [309, 91, 322, 103], [343, 101, 351, 111], [84, 80, 119, 128], [207, 104, 221, 117], [339, 101, 350, 111], [439, 63, 476, 128], [142, 107, 153, 124], [2, 38, 246, 128], [409, 121, 418, 127]], "scores": [0.28542420268058777, 0.28487035632133484, 0.36523717641830444, 0.4311409592628479, 0.4919242858886719, 0.396202027797699, 0.6826230883598328, 0.2101944386959076, 0.40151289105415344, 0.38739120960235596, 0.3113582730293274, 0.39765074849128723, 0.263857364654541, 0.20516516268253326, 0.21645647287368774, 0.42017483711242676, 0.3165283203125, 0.30698636174201965, 0.4217240810394287, 0.29491209983825684, 0.2094983011484146, 0.3914622962474823, 0.24434015154838562, 0.39745667576789856, 0.43129435181617737, 0.2708623707294464, 0.30795127153396606, 0.34617385268211365], "labels": ["basketball", "basketball player", "basketball", "basketball player", "player", "basketball player", "basketball", "athletic", "player", "player", "basketball", "basketball", "image", "playingfield", "athletic", "basketball player", "player", "basketball player", "basketball player", "athletic", "basketball", "player", "athletic", "basketball", "player", "athletic", "playingfield", "basketball"]}, {"id": "VD_video_1_19_0_0", "boxes": [[261, 76, 472, 538], [1192, 90, 1538, 543], [3280, 106, 3401, 487], [2190, 87, 2665, 535], [2091, 360, 2804, 538], [2862, 322, 3839, 537]], "scores": [0.21436624228954315, 0.20492792129516602, 0.28767725825309753, 0.21343989670276642, 0.2749791741371155, 0.409064382314682], "labels": ["man", "man", "man", "man", "arena", "arena"]}, {"id": "VS_chart_2_10_2_1", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20262987911701202, 0.36390820145606995, 0.4472777247428894, 0.4253298044204712, 0.24382619559764862, 0.29275593161582947, 0.27370691299438477, 0.2088310867547989, 0.37130486965179443], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_2_5_3_3", "boxes": [[966, 299, 986, 324], [966, 377, 986, 403], [957, 456, 976, 482], [496, 535, 515, 561], [966, 535, 986, 561], [966, 613, 985, 640], [966, 770, 986, 797]], "scores": [0.2398996651172638, 0.2337634265422821, 0.23518456518650055, 0.20370903611183167, 0.26720917224884033, 0.2997090816497803, 0.3071097135543823], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_5_1_1", "boxes": [[2, -3, 926, 611], [15, 95, 874, 571]], "scores": [0.7825090289115906, 0.20423205196857452], "labels": ["map", "map"]}, {"id": "VD_illusion_1_2_0_0", "boxes": [[37, 19, 109, 101], [787, 33, 857, 119], [51, 21, 865, 120], [16, 276, 89, 370], [22, 274, 963, 369], [870, 269, 946, 352]], "scores": [0.21531696617603302, 0.2525463104248047, 0.4610448479652405, 0.21393299102783203, 0.36978569626808167, 0.23460133373737335], "labels": ["point", "arrow", "arrow", "point", "arrow", "point"]}, {"id": "VD_math_2_1_1_1", "boxes": [[64, 40, 677, 308]], "scores": [0.6838505864143372], "labels": ["triangle"]}, {"id": "VD_ocr_2_9_1_1", "boxes": [[23, 0, 498, 173], [235, 242, 311, 303], [1, 279, 498, 676], [151, 260, 238, 348], [149, 241, 329, 361], [236, 289, 316, 349], [8, 236, 498, 597], [133, 339, 355, 551], [217, 494, 307, 566], [216, 559, 291, 604]], "scores": [0.23231002688407898, 0.2696240544319153, 0.4447494447231293, 0.3346000909805298, 0.2995230257511139, 0.22970399260520935, 0.7660083770751953, 0.22466279566287994, 0.29208919405937195, 0.30653566122055054], "labels": ["plate", "berry", "plate", "blackberry", "berry", "berry", "cheesecake", "cream", "berry", "berry"]}, {"id": "VD_video_2_2_1_0", "boxes": [[160, 23, 173, 37], [355, 6, 379, 58], [475, 32, 507, 90], [286, 42, 319, 82], [387, 30, 452, 101], [151, 41, 197, 107], [193, 41, 231, 111], [65, 63, 74, 79], [85, 67, 96, 80], [134, 54, 248, 127], [281, 43, 350, 115], [262, 42, 375, 128], [418, 69, 428, 79], [390, 38, 505, 127], [3, 6, 509, 130], [270, 66, 304, 127], [56, 61, 94, 127], [467, 78, 503, 126], [76, 78, 122, 125], [273, 67, 330, 125], [273, 67, 349, 124], [38, 72, 63, 126], [212, 99, 222, 109], [5, 76, 121, 126], [134, 87, 171, 126], [26, 119, 35, 125]], "scores": [0.4007842242717743, 0.26124730706214905, 0.41473305225372314, 0.2626587152481079, 0.5071172714233398, 0.4089081287384033, 0.3968779742717743, 0.24098904430866241, 0.35128286480903625, 0.2604479491710663, 0.28155073523521423, 0.21293513476848602, 0.6992228031158447, 0.25423482060432434, 0.27064844965934753, 0.23352737724781036, 0.40800169110298157, 0.44306302070617676, 0.20243123173713684, 0.3792933523654938, 0.382467120885849, 0.42971402406692505, 0.22330676019191742, 0.23211327195167542, 0.34798312187194824, 0.2687023878097534], "labels": ["basketball", "basketball player", "basketball player", "player", "player", "player", "basketball player", "basketball", "basketball", "playingfield", "player", "playingfield", "basketball", "playingfield", "image", "player", "basketball player", "player", "playingfield", "basketball player", "player", "basketball player", "basketball", "playingfield", "basketball player", "basketball"]}, {"id": "VD_video_1_19_0_1", "boxes": [[261, 76, 472, 538], [1192, 90, 1538, 543], [3280, 106, 3401, 487], [2190, 87, 2665, 535], [2091, 360, 2804, 538], [2862, 322, 3839, 537]], "scores": [0.21436624228954315, 0.20492792129516602, 0.28767725825309753, 0.21343989670276642, 0.2749791741371155, 0.409064382314682], "labels": ["man", "man", "man", "man", "arena", "arena"]}, {"id": "VS_chart_2_10_2_2", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20262987911701202, 0.36390820145606995, 0.4472777247428894, 0.4253298044204712, 0.24382619559764862, 0.29275593161582947, 0.27370691299438477, 0.2088310867547989, 0.37130486965179443], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_0_6_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_5_1_2", "boxes": [[2, -3, 926, 611], [15, 95, 874, 571]], "scores": [0.7825090289115906, 0.20423205196857452], "labels": ["map", "map"]}, {"id": "VD_illusion_1_2_0_1", "boxes": [[37, 19, 109, 101], [787, 33, 857, 119], [51, 21, 865, 120], [16, 276, 89, 370], [22, 274, 963, 369], [870, 269, 946, 352]], "scores": [0.21531696617603302, 0.2525463104248047, 0.4610448479652405, 0.21393299102783203, 0.36978569626808167, 0.23460133373737335], "labels": ["point", "arrow", "arrow", "point", "arrow", "point"]}, {"id": "VD_math_2_1_1_2", "boxes": [[64, 40, 677, 308]], "scores": [0.6838505864143372], "labels": ["triangle"]}, {"id": "VD_ocr_1_10_0_0", "boxes": [[229, 63, 318, 121], [757, 88, 820, 162], [85, 199, 102, 213], [277, 207, 292, 220], [710, 222, 723, 233], [275, 202, 713, 318], [857, 284, 1057, 362], [1, 75, 1043, 532], [0, 193, 1045, 534], [551, 425, 571, 443]], "scores": [0.6162660121917725, 0.5804665088653564, 0.22544649243354797, 0.3567067086696625, 0.34806376695632935, 0.2049751877784729, 0.29660093784332275, 0.23727966845035553, 0.2552465796470642, 0.20237919688224792], "labels": ["flag", "flag", "flag", "flag", "flag", "sign", "building", "building", "building", "sign"]}, {"id": "VD_video_2_2_1_1", "boxes": [[160, 23, 173, 37], [355, 6, 379, 58], [475, 32, 507, 90], [286, 42, 319, 82], [387, 30, 452, 101], [151, 41, 197, 107], [193, 41, 231, 111], [65, 63, 74, 79], [85, 67, 96, 80], [134, 54, 248, 127], [281, 43, 350, 115], [262, 42, 375, 128], [418, 69, 428, 79], [390, 38, 505, 127], [3, 6, 509, 130], [270, 66, 304, 127], [56, 61, 94, 127], [467, 78, 503, 126], [76, 78, 122, 125], [273, 67, 330, 125], [273, 67, 349, 124], [38, 72, 63, 126], [212, 99, 222, 109], [5, 76, 121, 126], [134, 87, 171, 126], [26, 119, 35, 125]], "scores": [0.4007842242717743, 0.26124730706214905, 0.41473305225372314, 0.2626587152481079, 0.5071172714233398, 0.4089081287384033, 0.3968779742717743, 0.24098904430866241, 0.35128286480903625, 0.2604479491710663, 0.28155073523521423, 0.21293513476848602, 0.6992228031158447, 0.25423482060432434, 0.27064844965934753, 0.23352737724781036, 0.40800169110298157, 0.44306302070617676, 0.20243123173713684, 0.3792933523654938, 0.382467120885849, 0.42971402406692505, 0.22330676019191742, 0.23211327195167542, 0.34798312187194824, 0.2687023878097534], "labels": ["basketball", "basketball player", "basketball player", "player", "player", "player", "basketball player", "basketball", "basketball", "playingfield", "player", "playingfield", "basketball", "playingfield", "image", "player", "basketball player", "player", "playingfield", "basketball player", "player", "basketball player", "basketball", "playingfield", "basketball player", "basketball"]}, {"id": "VD_video_1_19_0_2", "boxes": [[261, 76, 472, 538], [1192, 90, 1538, 543], [3280, 106, 3401, 487], [2190, 87, 2665, 535], [2091, 360, 2804, 538], [2862, 322, 3839, 537]], "scores": [0.21436624228954315, 0.20492792129516602, 0.28767725825309753, 0.21343989670276642, 0.2749791741371155, 0.409064382314682], "labels": ["man", "man", "man", "man", "arena", "arena"]}, {"id": "VS_chart_2_10_2_3", "boxes": [[351, 25, 599, 41], [31, 19, 637, 309], [45, 30, 368, 294], [378, 76, 595, 209], [562, 159, 569, 169], [593, 192, 599, 202], [2, -1, 676, 417], [421, 345, 436, 365], [26, 340, 578, 409]], "scores": [0.20262987911701202, 0.36390820145606995, 0.4472777247428894, 0.4253298044204712, 0.24382619559764862, 0.29275593161582947, 0.27370691299438477, 0.2088310867547989, 0.37130486965179443], "labels": ["text", "graph", "circle", "text", "number", "number", "graph", "number", "text"]}, {"id": "VS_table_0_6_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_5_1_3", "boxes": [[2, -3, 926, 611], [15, 95, 874, 571]], "scores": [0.7825090289115906, 0.20423205196857452], "labels": ["map", "map"]}, {"id": "VD_illusion_1_2_0_2", "boxes": [[37, 19, 109, 101], [787, 33, 857, 119], [51, 21, 865, 120], [16, 276, 89, 370], [22, 274, 963, 369], [870, 269, 946, 352]], "scores": [0.21531696617603302, 0.2525463104248047, 0.4610448479652405, 0.21393299102783203, 0.36978569626808167, 0.23460133373737335], "labels": ["point", "arrow", "arrow", "point", "arrow", "point"]}, {"id": "VD_math_1_2_0_0", "boxes": [[9, 18, 49, 62], [24, 19, 1003, 650]], "scores": [0.23335930705070496, 0.8001970648765564], "labels": ["triangle", "triangle"]}, {"id": "VD_ocr_1_10_0_1", "boxes": [[229, 63, 318, 121], [757, 88, 820, 162], [85, 199, 102, 213], [277, 207, 292, 220], [710, 222, 723, 233], [275, 202, 713, 318], [857, 284, 1057, 362], [1, 75, 1043, 532], [0, 193, 1045, 534], [551, 425, 571, 443]], "scores": [0.6162660121917725, 0.5804665088653564, 0.22544649243354797, 0.3567067086696625, 0.34806376695632935, 0.2049751877784729, 0.29660093784332275, 0.23727966845035553, 0.2552465796470642, 0.20237919688224792], "labels": ["flag", "flag", "flag", "flag", "flag", "sign", "building", "building", "building", "sign"]}, {"id": "VD_video_2_2_1_2", "boxes": [[160, 23, 173, 37], [355, 6, 379, 58], [475, 32, 507, 90], [286, 42, 319, 82], [387, 30, 452, 101], [151, 41, 197, 107], [193, 41, 231, 111], [65, 63, 74, 79], [85, 67, 96, 80], [134, 54, 248, 127], [281, 43, 350, 115], [262, 42, 375, 128], [418, 69, 428, 79], [390, 38, 505, 127], [3, 6, 509, 130], [270, 66, 304, 127], [56, 61, 94, 127], [467, 78, 503, 126], [76, 78, 122, 125], [273, 67, 330, 125], [273, 67, 349, 124], [38, 72, 63, 126], [212, 99, 222, 109], [5, 76, 121, 126], [134, 87, 171, 126], [26, 119, 35, 125]], "scores": [0.4007842242717743, 0.26124730706214905, 0.41473305225372314, 0.2626587152481079, 0.5071172714233398, 0.4089081287384033, 0.3968779742717743, 0.24098904430866241, 0.35128286480903625, 0.2604479491710663, 0.28155073523521423, 0.21293513476848602, 0.6992228031158447, 0.25423482060432434, 0.27064844965934753, 0.23352737724781036, 0.40800169110298157, 0.44306302070617676, 0.20243123173713684, 0.3792933523654938, 0.382467120885849, 0.42971402406692505, 0.22330676019191742, 0.23211327195167542, 0.34798312187194824, 0.2687023878097534], "labels": ["basketball", "basketball player", "basketball player", "player", "player", "player", "basketball player", "basketball", "basketball", "playingfield", "player", "playingfield", "basketball", "playingfield", "image", "player", "basketball player", "player", "playingfield", "basketball player", "player", "basketball player", "basketball", "playingfield", "basketball player", "basketball"]}, {"id": "VD_video_1_19_0_3", "boxes": [[261, 76, 472, 538], [1192, 90, 1538, 543], [3280, 106, 3401, 487], [2190, 87, 2665, 535], [2091, 360, 2804, 538], [2862, 322, 3839, 537]], "scores": [0.21436624228954315, 0.20492792129516602, 0.28767725825309753, 0.21343989670276642, 0.2749791741371155, 0.409064382314682], "labels": ["man", "man", "man", "man", "arena", "arena"]}, {"id": "VS_chart_0_11_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_6_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_5_2_0", "boxes": [[2, -3, 926, 611], [15, 95, 875, 571]], "scores": [0.7825810313224792, 0.20243880152702332], "labels": ["map", "map"]}, {"id": "VD_illusion_2_2_1_0", "boxes": [[38, 9, 108, 99], [1004, 14, 1077, 95], [49, 9, 1100, 104], [17, 260, 902, 360], [825, 256, 895, 336]], "scores": [0.2188660055398941, 0.2702755331993103, 0.3973764181137085, 0.3494572937488556, 0.2171633541584015], "labels": ["point", "point", "arrow", "arrow", "point"]}, {"id": "VD_math_1_2_0_1", "boxes": [[9, 18, 49, 62], [24, 19, 1003, 650]], "scores": [0.23335930705070496, 0.8001970648765564], "labels": ["triangle", "triangle"]}, {"id": "VD_ocr_2_10_1_0", "boxes": [[245, 63, 340, 124], [796, 90, 861, 167], [296, 214, 310, 225], [746, 229, 760, 241], [902, 294, 1111, 374], [4, 284, 1097, 559], [5, 69, 1095, 556]], "scores": [0.6159765720367432, 0.5928120613098145, 0.3808382451534271, 0.35194069147109985, 0.2785181403160095, 0.20150424540042877, 0.2796030640602112], "labels": ["flag", "flag", "flag", "flag", "building", "building", "building"]}, {"id": "VD_video_1_3_0_0", "boxes": [[462, 7, 507, 143], [388, 9, 435, 171], [10, 7, 125, 165], [350, 8, 388, 171], [7, 5, 164, 168], [381, 7, 504, 169], [426, 48, 462, 96], [9, 47, 125, 166], [407, 40, 507, 165], [178, 7, 236, 166], [178, 52, 236, 165], [282, 49, 371, 165], [182, 102, 237, 165], [349, 89, 375, 165], [282, 107, 333, 166], [409, 101, 507, 164], [3, 0, 508, 168]], "scores": [0.29469868540763855, 0.3287416696548462, 0.3445345163345337, 0.250554621219635, 0.2150716632604599, 0.23214052617549896, 0.2105013132095337, 0.35662174224853516, 0.31605368852615356, 0.3400357961654663, 0.3490929901599884, 0.26491308212280273, 0.20095038414001465, 0.23175424337387085, 0.2814050614833832, 0.3487337529659271, 0.25352245569229126], "labels": ["curtain", "curtain", "man", "curtain", "image", "curtain", "face", "shirt", "man", "man", "shirt", "man", "shirt", "shirt", "shirt", "shirt", "image"]}, {"id": "VD_video_2_19_1_0", "boxes": [[12, 24, 989, 410], [418, 103, 554, 481], [2926, 53, 3854, 445], [3133, 75, 3345, 535], [3308, 163, 3796, 215], [3656, 161, 3817, 432], [1214, 86, 1737, 536], [2142, 82, 2441, 532], [3019, 378, 3837, 534], [-7, 315, 954, 537], [975, 365, 1907, 539]], "scores": [0.260258287191391, 0.28917860984802246, 0.22192728519439697, 0.2358958125114441, 0.2087588757276535, 0.20619596540927887, 0.2283080816268921, 0.20614100992679596, 0.26817992329597473, 0.42232850193977356, 0.3854024112224579], "labels": ["person", "man", "person", "person", "pole", "person", "man", "man", "arena", "arena", "arena"]}, {"id": "VS_chart_0_11_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_6_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_5_2_1", "boxes": [[2, -3, 926, 611], [15, 95, 875, 571]], "scores": [0.7825810313224792, 0.20243880152702332], "labels": ["map", "map"]}, {"id": "VD_illusion_2_2_1_1", "boxes": [[38, 9, 108, 99], [1004, 14, 1077, 95], [49, 9, 1100, 104], [17, 260, 902, 360], [825, 256, 895, 336]], "scores": [0.2188660055398941, 0.2702755331993103, 0.3973764181137085, 0.3494572937488556, 0.2171633541584015], "labels": ["point", "point", "arrow", "arrow", "point"]}, {"id": "VD_math_1_2_0_2", "boxes": [[9, 18, 49, 62], [24, 19, 1003, 650]], "scores": [0.23335930705070496, 0.8001970648765564], "labels": ["triangle", "triangle"]}, {"id": "VD_ocr_2_10_1_1", "boxes": [[245, 63, 340, 124], [796, 90, 861, 167], [296, 214, 310, 225], [746, 229, 760, 241], [902, 294, 1111, 374], [4, 284, 1097, 559], [5, 69, 1095, 556]], "scores": [0.6159765720367432, 0.5928120613098145, 0.3808382451534271, 0.35194069147109985, 0.2785181403160095, 0.20150424540042877, 0.2796030640602112], "labels": ["flag", "flag", "flag", "flag", "building", "building", "building"]}, {"id": "VD_video_1_3_0_1", "boxes": [[462, 7, 507, 143], [388, 9, 435, 171], [10, 7, 125, 165], [350, 8, 388, 171], [7, 5, 164, 168], [381, 7, 504, 169], [426, 48, 462, 96], [9, 47, 125, 166], [407, 40, 507, 165], [178, 7, 236, 166], [178, 52, 236, 165], [282, 49, 371, 165], [182, 102, 237, 165], [349, 89, 375, 165], [282, 107, 333, 166], [409, 101, 507, 164], [3, 0, 508, 168]], "scores": [0.29469868540763855, 0.3287416696548462, 0.3445345163345337, 0.250554621219635, 0.2150716632604599, 0.23214052617549896, 0.2105013132095337, 0.35662174224853516, 0.31605368852615356, 0.3400357961654663, 0.3490929901599884, 0.26491308212280273, 0.20095038414001465, 0.23175424337387085, 0.2814050614833832, 0.3487337529659271, 0.25352245569229126], "labels": ["curtain", "curtain", "man", "curtain", "image", "curtain", "face", "shirt", "man", "man", "shirt", "man", "shirt", "shirt", "shirt", "shirt", "image"]}, {"id": "VD_video_2_19_1_1", "boxes": [[12, 24, 989, 410], [418, 103, 554, 481], [2926, 53, 3854, 445], [3133, 75, 3345, 535], [3308, 163, 3796, 215], [3656, 161, 3817, 432], [1214, 86, 1737, 536], [2142, 82, 2441, 532], [3019, 378, 3837, 534], [-7, 315, 954, 537], [975, 365, 1907, 539]], "scores": [0.260258287191391, 0.28917860984802246, 0.22192728519439697, 0.2358958125114441, 0.2087588757276535, 0.20619596540927887, 0.2283080816268921, 0.20614100992679596, 0.26817992329597473, 0.42232850193977356, 0.3854024112224579], "labels": ["person", "man", "person", "person", "pole", "person", "man", "man", "arena", "arena", "arena"]}, {"id": "VS_chart_1_11_1_0", "boxes": [[7, 1, 1455, 1250], [135, 282, 1339, 295], [151, 146, 1346, 1105], [133, 413, 1333, 425], [36, 124, 1367, 1127], [142, 548, 1332, 561], [171, 256, 1338, 1069], [145, 810, 1349, 824], [147, 945, 1346, 959], [110, 1075, 1360, 1093]], "scores": [0.5434068441390991, 0.21315637230873108, 0.37497952580451965, 0.2113989144563675, 0.22320204973220825, 0.2760990560054779, 0.47583988308906555, 0.27556565403938293, 0.24592739343643188, 0.3008579909801483], "labels": ["graph", "line", "graph", "line", "graph", "line", "graph", "line", "line", "line"]}, {"id": "VS_table_1_6_1_0", "boxes": [[217, 119, 1124, 158], [987, 280, 1005, 306], [128, 185, 1250, 619], [936, 353, 948, 378], [986, 352, 1004, 377], [936, 423, 948, 447], [313, 213, 463, 597], [929, 494, 948, 521], [991, 494, 1010, 520], [942, 563, 999, 592], [977, 564, 997, 591]], "scores": [0.3531605303287506, 0.23156507313251495, 0.2487548291683197, 0.20898395776748657, 0.21267488598823547, 0.21406757831573486, 0.2578071355819702, 0.23073628544807434, 0.2992226183414459, 0.21012482047080994, 0.21066060662269592], "labels": ["text", "number", "text", "number", "number", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_map_2_5_2_2", "boxes": [[2, -3, 926, 611], [15, 95, 875, 571]], "scores": [0.7825810313224792, 0.20243880152702332], "labels": ["map", "map"]}, {"id": "VD_illusion_2_2_1_2", "boxes": [[38, 9, 108, 99], [1004, 14, 1077, 95], [49, 9, 1100, 104], [17, 260, 902, 360], [825, 256, 895, 336]], "scores": [0.2188660055398941, 0.2702755331993103, 0.3973764181137085, 0.3494572937488556, 0.2171633541584015], "labels": ["point", "point", "arrow", "arrow", "point"]}, {"id": "VD_math_2_2_1_0", "boxes": [[36, 19, 873, 601]], "scores": [0.6197885870933533], "labels": ["triangle"]}, {"id": "VD_ocr_1_11_0_0", "boxes": [[0, 0, 196, 168], [1, 6, 228, 501], [2, 38, 877, 475], [1, 300, 190, 504], [0, 303, 366, 505], [236, 410, 365, 504], [532, 371, 753, 504]], "scores": [0.25719210505485535, 0.2884848415851593, 0.6661131381988525, 0.40401527285575867, 0.22337940335273743, 0.3231268525123596, 0.4925535023212433], "labels": ["palm tree", "palm tree", "sign", "palm tree", "palm tree", "palm tree", "palm tree"]}, {"id": "VD_video_1_3_0_2", "boxes": [[462, 7, 507, 143], [388, 9, 435, 171], [10, 7, 125, 165], [350, 8, 388, 171], [7, 5, 164, 168], [381, 7, 504, 169], [426, 48, 462, 96], [9, 47, 125, 166], [407, 40, 507, 165], [178, 7, 236, 166], [178, 52, 236, 165], [282, 49, 371, 165], [182, 102, 237, 165], [349, 89, 375, 165], [282, 107, 333, 166], [409, 101, 507, 164], [3, 0, 508, 168]], "scores": [0.29469868540763855, 0.3287416696548462, 0.3445345163345337, 0.250554621219635, 0.2150716632604599, 0.23214052617549896, 0.2105013132095337, 0.35662174224853516, 0.31605368852615356, 0.3400357961654663, 0.3490929901599884, 0.26491308212280273, 0.20095038414001465, 0.23175424337387085, 0.2814050614833832, 0.3487337529659271, 0.25352245569229126], "labels": ["curtain", "curtain", "man", "curtain", "image", "curtain", "face", "shirt", "man", "man", "shirt", "man", "shirt", "shirt", "shirt", "shirt", "image"]}, {"id": "VD_video_2_19_1_2", "boxes": [[12, 24, 989, 410], [418, 103, 554, 481], [2926, 53, 3854, 445], [3133, 75, 3345, 535], [3308, 163, 3796, 215], [3656, 161, 3817, 432], [1214, 86, 1737, 536], [2142, 82, 2441, 532], [3019, 378, 3837, 534], [-7, 315, 954, 537], [975, 365, 1907, 539]], "scores": [0.260258287191391, 0.28917860984802246, 0.22192728519439697, 0.2358958125114441, 0.2087588757276535, 0.20619596540927887, 0.2283080816268921, 0.20614100992679596, 0.26817992329597473, 0.42232850193977356, 0.3854024112224579], "labels": ["person", "man", "person", "person", "pole", "person", "man", "man", "arena", "arena", "arena"]}, {"id": "VS_chart_1_11_1_1", "boxes": [[7, 1, 1455, 1250], [135, 282, 1339, 295], [151, 146, 1346, 1105], [133, 413, 1333, 425], [36, 124, 1367, 1127], [142, 548, 1332, 561], [171, 256, 1338, 1069], [145, 810, 1349, 824], [147, 945, 1346, 959], [110, 1075, 1360, 1093]], "scores": [0.5434068441390991, 0.21315637230873108, 0.37497952580451965, 0.2113989144563675, 0.22320204973220825, 0.2760990560054779, 0.47583988308906555, 0.27556565403938293, 0.24592739343643188, 0.3008579909801483], "labels": ["graph", "line", "graph", "line", "graph", "line", "graph", "line", "line", "line"]}, {"id": "VS_table_1_6_1_1", "boxes": [[217, 119, 1124, 158], [987, 280, 1005, 306], [128, 185, 1250, 619], [936, 353, 948, 378], [986, 352, 1004, 377], [936, 423, 948, 447], [313, 213, 463, 597], [929, 494, 948, 521], [991, 494, 1010, 520], [942, 563, 999, 592], [977, 564, 997, 591]], "scores": [0.3531605303287506, 0.23156507313251495, 0.2487548291683197, 0.20898395776748657, 0.21267488598823547, 0.21406757831573486, 0.2578071355819702, 0.23073628544807434, 0.2992226183414459, 0.21012482047080994, 0.21066060662269592], "labels": ["text", "number", "text", "number", "number", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_map_2_5_2_3", "boxes": [[2, -3, 926, 611], [15, 95, 875, 571]], "scores": [0.7825810313224792, 0.20243880152702332], "labels": ["map", "map"]}, {"id": "VD_illusion_1_3_0_0", "boxes": [[88, 9, 708, 571]], "scores": [0.691744327545166], "labels": ["ladder"]}, {"id": "VD_math_2_2_1_1", "boxes": [[36, 19, 873, 601]], "scores": [0.6197885870933533], "labels": ["triangle"]}, {"id": "VD_ocr_1_11_0_1", "boxes": [[0, 0, 196, 168], [1, 6, 228, 501], [2, 38, 877, 475], [1, 300, 190, 504], [0, 303, 366, 505], [236, 410, 365, 504], [532, 371, 753, 504]], "scores": [0.25719210505485535, 0.2884848415851593, 0.6661131381988525, 0.40401527285575867, 0.22337940335273743, 0.3231268525123596, 0.4925535023212433], "labels": ["palm tree", "palm tree", "sign", "palm tree", "palm tree", "palm tree", "palm tree"]}, {"id": "VD_video_1_3_0_3", "boxes": [[462, 7, 507, 143], [388, 9, 435, 171], [10, 7, 125, 165], [350, 8, 388, 171], [7, 5, 164, 168], [381, 7, 504, 169], [426, 48, 462, 96], [9, 47, 125, 166], [407, 40, 507, 165], [178, 7, 236, 166], [178, 52, 236, 165], [282, 49, 371, 165], [182, 102, 237, 165], [349, 89, 375, 165], [282, 107, 333, 166], [409, 101, 507, 164], [3, 0, 508, 168]], "scores": [0.29469868540763855, 0.3287416696548462, 0.3445345163345337, 0.250554621219635, 0.2150716632604599, 0.23214052617549896, 0.2105013132095337, 0.35662174224853516, 0.31605368852615356, 0.3400357961654663, 0.3490929901599884, 0.26491308212280273, 0.20095038414001465, 0.23175424337387085, 0.2814050614833832, 0.3487337529659271, 0.25352245569229126], "labels": ["curtain", "curtain", "man", "curtain", "image", "curtain", "face", "shirt", "man", "man", "shirt", "man", "shirt", "shirt", "shirt", "shirt", "image"]}, {"id": "VD_video_2_19_1_3", "boxes": [[12, 24, 989, 410], [418, 103, 554, 481], [2926, 53, 3854, 445], [3133, 75, 3345, 535], [3308, 163, 3796, 215], [3656, 161, 3817, 432], [1214, 86, 1737, 536], [2142, 82, 2441, 532], [3019, 378, 3837, 534], [-7, 315, 954, 537], [975, 365, 1907, 539]], "scores": [0.260258287191391, 0.28917860984802246, 0.22192728519439697, 0.2358958125114441, 0.2087588757276535, 0.20619596540927887, 0.2283080816268921, 0.20614100992679596, 0.26817992329597473, 0.42232850193977356, 0.3854024112224579], "labels": ["person", "man", "person", "person", "pole", "person", "man", "man", "arena", "arena", "arena"]}, {"id": "VS_chart_0_12_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_6_1_2", "boxes": [[217, 119, 1124, 158], [987, 280, 1005, 306], [128, 185, 1250, 619], [936, 353, 948, 378], [986, 352, 1004, 377], [936, 423, 948, 447], [313, 213, 463, 597], [929, 494, 948, 521], [991, 494, 1010, 520], [942, 563, 999, 592], [977, 564, 997, 591]], "scores": [0.3531605303287506, 0.23156507313251495, 0.2487548291683197, 0.20898395776748657, 0.21267488598823547, 0.21406757831573486, 0.2578071355819702, 0.23073628544807434, 0.2992226183414459, 0.21012482047080994, 0.21066060662269592], "labels": ["text", "number", "text", "number", "number", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_map_0_6_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_3_0_1", "boxes": [[88, 9, 708, 571]], "scores": [0.691744327545166], "labels": ["ladder"]}, {"id": "VD_math_2_2_1_2", "boxes": [[36, 19, 873, 601]], "scores": [0.6197885870933533], "labels": ["triangle"]}, {"id": "VD_ocr_2_11_1_0", "boxes": [[12, 8, 286, 243], [1203, 11, 1262, 246], [8, 17, 320, 710], [18, 64, 1225, 673], [12, 434, 158, 714], [98, 488, 272, 714], [13, 432, 280, 713], [15, 412, 534, 712], [342, 588, 528, 713], [750, 529, 1059, 711]], "scores": [0.3195142447948456, 0.21720051765441895, 0.22215263545513153, 0.4849699139595032, 0.21051228046417236, 0.20313823223114014, 0.4072941839694977, 0.24958978593349457, 0.35528427362442017, 0.4645598232746124], "labels": ["palm tree", "palm tree", "palm tree", "sign", "palm tree", "palm tree", "palm tree", "palm tree", "palm tree", "palm tree"]}, {"id": "VD_video_2_3_1_0", "boxes": [[119, 12, 162, 137], [45, 12, 159, 172], [47, 12, 95, 172], [6, 5, 500, 171], [346, 12, 459, 167], [347, 54, 459, 168], [66, 44, 162, 167], [175, 12, 234, 168], [277, 55, 330, 169], [175, 57, 234, 168], [6, 89, 35, 168], [278, 111, 330, 167], [67, 104, 162, 165]], "scores": [0.22879736125469208, 0.2521636486053467, 0.26563432812690735, 0.21189701557159424, 0.3370260000228882, 0.32930728793144226, 0.2986932396888733, 0.3098801374435425, 0.25466230511665344, 0.31994614005088806, 0.21072795987129211, 0.29537463188171387, 0.35199591517448425], "labels": ["curtain", "curtain", "curtain", "image", "man", "shirt", "man", "man", "person", "shirt", "person", "shirt", "shirt"]}, {"id": "VD_video_2_19_2_0", "boxes": [[54, 50, 880, 494], [16, 30, 1898, 501], [962, 35, 1919, 438], [267, 82, 472, 544], [1372, 117, 1505, 494], [798, 167, 954, 445], [936, 314, 1904, 543], [2010, 370, 2769, 542], [3053, 395, 3826, 538]], "scores": [0.2509373128414154, 0.238901287317276, 0.2174869179725647, 0.2623324990272522, 0.258409321308136, 0.26290130615234375, 0.3696693778038025, 0.3087685704231262, 0.24782009422779083], "labels": ["person", "person", "person", "person", "person", "person", "arena", "arena", "arena"]}, {"id": "VS_chart_0_12_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_6_1_3", "boxes": [[217, 119, 1124, 158], [987, 280, 1005, 306], [128, 185, 1250, 619], [936, 353, 948, 378], [986, 352, 1004, 377], [936, 423, 948, 447], [313, 213, 463, 597], [929, 494, 948, 521], [991, 494, 1010, 520], [942, 563, 999, 592], [977, 564, 997, 591]], "scores": [0.3531605303287506, 0.23156507313251495, 0.2487548291683197, 0.20898395776748657, 0.21267488598823547, 0.21406757831573486, 0.2578071355819702, 0.23073628544807434, 0.2992226183414459, 0.21012482047080994, 0.21066060662269592], "labels": ["text", "number", "text", "number", "number", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_map_0_6_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_3_0_2", "boxes": [[88, 9, 708, 571]], "scores": [0.691744327545166], "labels": ["ladder"]}, {"id": "VD_math_1_3_0_0", "boxes": [[0, 0, 784, 484], [39, 159, 723, 232], [298, 39, 487, 488], [29, 42, 762, 480], [39, 319, 723, 392]], "scores": [0.25893160700798035, 0.31487971544265747, 0.21064606308937073, 0.2872960865497589, 0.3521564304828644], "labels": ["angle", "line", "line", "angle", "line"]}, {"id": "VD_ocr_2_11_1_1", "boxes": [[12, 8, 286, 243], [1203, 11, 1262, 246], [8, 17, 320, 710], [18, 64, 1225, 673], [12, 434, 158, 714], [98, 488, 272, 714], [13, 432, 280, 713], [15, 412, 534, 712], [342, 588, 528, 713], [750, 529, 1059, 711]], "scores": [0.3195142447948456, 0.21720051765441895, 0.22215263545513153, 0.4849699139595032, 0.21051228046417236, 0.20313823223114014, 0.4072941839694977, 0.24958978593349457, 0.35528427362442017, 0.4645598232746124], "labels": ["palm tree", "palm tree", "palm tree", "sign", "palm tree", "palm tree", "palm tree", "palm tree", "palm tree", "palm tree"]}, {"id": "VD_video_2_3_1_1", "boxes": [[119, 12, 162, 137], [45, 12, 159, 172], [47, 12, 95, 172], [6, 5, 500, 171], [346, 12, 459, 167], [347, 54, 459, 168], [66, 44, 162, 167], [175, 12, 234, 168], [277, 55, 330, 169], [175, 57, 234, 168], [6, 89, 35, 168], [278, 111, 330, 167], [67, 104, 162, 165]], "scores": [0.22879736125469208, 0.2521636486053467, 0.26563432812690735, 0.21189701557159424, 0.3370260000228882, 0.32930728793144226, 0.2986932396888733, 0.3098801374435425, 0.25466230511665344, 0.31994614005088806, 0.21072795987129211, 0.29537463188171387, 0.35199591517448425], "labels": ["curtain", "curtain", "curtain", "image", "man", "shirt", "man", "man", "person", "shirt", "person", "shirt", "shirt"]}, {"id": "VD_video_2_19_2_1", "boxes": [[54, 50, 880, 494], [16, 30, 1898, 501], [962, 35, 1919, 438], [267, 82, 472, 544], [1372, 117, 1505, 494], [798, 167, 954, 445], [936, 314, 1904, 543], [2010, 370, 2769, 542], [3053, 395, 3826, 538]], "scores": [0.2509373128414154, 0.238901287317276, 0.2174869179725647, 0.2623324990272522, 0.258409321308136, 0.26290130615234375, 0.3696693778038025, 0.3087685704231262, 0.24782009422779083], "labels": ["person", "person", "person", "person", "person", "person", "arena", "arena", "arena"]}, {"id": "VS_chart_0_12_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_6_2_0", "boxes": [[898, 228, 916, 254], [951, 228, 969, 254], [900, 300, 912, 325], [952, 300, 969, 325], [901, 371, 912, 396], [934, 371, 951, 396], [952, 371, 969, 396], [893, 441, 912, 468], [956, 442, 974, 468], [941, 512, 961, 538], [908, 511, 964, 540]], "scores": [0.2129572182893753, 0.2651890218257904, 0.20842456817626953, 0.21338486671447754, 0.253712922334671, 0.20692847669124603, 0.22053101658821106, 0.22431083023548126, 0.3168787360191345, 0.22029146552085876, 0.21074049174785614], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_6_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_3_1_0", "boxes": [[86, 8, 653, 537]], "scores": [0.8439432978630066], "labels": ["ladder"]}, {"id": "VD_math_1_3_0_1", "boxes": [[0, 0, 784, 484], [39, 159, 723, 232], [298, 39, 487, 488], [29, 42, 762, 480], [39, 319, 723, 392]], "scores": [0.25893160700798035, 0.31487971544265747, 0.21064606308937073, 0.2872960865497589, 0.3521564304828644], "labels": ["angle", "line", "line", "angle", "line"]}, {"id": "VD_ocr_1_12_0_0", "boxes": [[1, 2, 394, 565], [86, 123, 139, 162], [4, 101, 332, 427], [78, 231, 392, 412], [8, 229, 392, 527]], "scores": [0.7051349878311157, 0.6085517406463623, 0.23755501210689545, 0.3226277828216553, 0.7245868444442749], "labels": ["poster", "microphone", "man", "guitar", "guitar"]}, {"id": "VD_video_2_3_1_2", "boxes": [[119, 12, 162, 137], [45, 12, 159, 172], [47, 12, 95, 172], [6, 5, 500, 171], [346, 12, 459, 167], [347, 54, 459, 168], [66, 44, 162, 167], [175, 12, 234, 168], [277, 55, 330, 169], [175, 57, 234, 168], [6, 89, 35, 168], [278, 111, 330, 167], [67, 104, 162, 165]], "scores": [0.22879736125469208, 0.2521636486053467, 0.26563432812690735, 0.21189701557159424, 0.3370260000228882, 0.32930728793144226, 0.2986932396888733, 0.3098801374435425, 0.25466230511665344, 0.31994614005088806, 0.21072795987129211, 0.29537463188171387, 0.35199591517448425], "labels": ["curtain", "curtain", "curtain", "image", "man", "shirt", "man", "man", "person", "shirt", "person", "shirt", "shirt"]}, {"id": "VD_video_2_19_2_2", "boxes": [[54, 50, 880, 494], [16, 30, 1898, 501], [962, 35, 1919, 438], [267, 82, 472, 544], [1372, 117, 1505, 494], [798, 167, 954, 445], [936, 314, 1904, 543], [2010, 370, 2769, 542], [3053, 395, 3826, 538]], "scores": [0.2509373128414154, 0.238901287317276, 0.2174869179725647, 0.2623324990272522, 0.258409321308136, 0.26290130615234375, 0.3696693778038025, 0.3087685704231262, 0.24782009422779083], "labels": ["person", "person", "person", "person", "person", "person", "arena", "arena", "arena"]}, {"id": "VS_chart_1_12_1_0", "boxes": [[6, 5, 1426, 1253], [57, 318, 80, 354], [132, 335, 1336, 346], [196, 204, 1350, 591], [135, 508, 1337, 519], [169, 158, 1367, 1024], [144, 679, 1347, 692], [175, 877, 1356, 925], [71, 190, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5348394513130188, 0.20173124969005585, 0.2983388900756836, 0.29035916924476624, 0.27171432971954346, 0.3974834978580475, 0.2667800486087799, 0.21900801360607147, 0.2889283299446106, 0.30039799213409424, 0.25966429710388184], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_2_1", "boxes": [[898, 228, 916, 254], [951, 228, 969, 254], [900, 300, 912, 325], [952, 300, 969, 325], [901, 371, 912, 396], [934, 371, 951, 396], [952, 371, 969, 396], [893, 441, 912, 468], [956, 442, 974, 468], [941, 512, 961, 538], [908, 511, 964, 540]], "scores": [0.2129572182893753, 0.2651890218257904, 0.20842456817626953, 0.21338486671447754, 0.253712922334671, 0.20692847669124603, 0.22053101658821106, 0.22431083023548126, 0.3168787360191345, 0.22029146552085876, 0.21074049174785614], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_6_1_0", "boxes": [[-38, -10, 1620, 1982], [-23, 286, 1586, 1880]], "scores": [0.3126941919326782, 0.40352490544319153], "labels": ["map", "map"]}, {"id": "VD_illusion_2_3_1_1", "boxes": [[86, 8, 653, 537]], "scores": [0.8439432978630066], "labels": ["ladder"]}, {"id": "VD_math_1_3_0_2", "boxes": [[0, 0, 784, 484], [39, 159, 723, 232], [298, 39, 487, 488], [29, 42, 762, 480], [39, 319, 723, 392]], "scores": [0.25893160700798035, 0.31487971544265747, 0.21064606308937073, 0.2872960865497589, 0.3521564304828644], "labels": ["angle", "line", "line", "angle", "line"]}, {"id": "VD_ocr_1_12_0_1", "boxes": [[1, 2, 394, 565], [86, 123, 139, 162], [4, 101, 332, 427], [78, 231, 392, 412], [8, 229, 392, 527]], "scores": [0.7051349878311157, 0.6085517406463623, 0.23755501210689545, 0.3226277828216553, 0.7245868444442749], "labels": ["poster", "microphone", "man", "guitar", "guitar"]}, {"id": "VD_video_2_3_1_3", "boxes": [[119, 12, 162, 137], [45, 12, 159, 172], [47, 12, 95, 172], [6, 5, 500, 171], [346, 12, 459, 167], [347, 54, 459, 168], [66, 44, 162, 167], [175, 12, 234, 168], [277, 55, 330, 169], [175, 57, 234, 168], [6, 89, 35, 168], [278, 111, 330, 167], [67, 104, 162, 165]], "scores": [0.22879736125469208, 0.2521636486053467, 0.26563432812690735, 0.21189701557159424, 0.3370260000228882, 0.32930728793144226, 0.2986932396888733, 0.3098801374435425, 0.25466230511665344, 0.31994614005088806, 0.21072795987129211, 0.29537463188171387, 0.35199591517448425], "labels": ["curtain", "curtain", "curtain", "image", "man", "shirt", "man", "man", "person", "shirt", "person", "shirt", "shirt"]}, {"id": "VD_video_2_19_2_3", "boxes": [[54, 50, 880, 494], [16, 30, 1898, 501], [962, 35, 1919, 438], [267, 82, 472, 544], [1372, 117, 1505, 494], [798, 167, 954, 445], [936, 314, 1904, 543], [2010, 370, 2769, 542], [3053, 395, 3826, 538]], "scores": [0.2509373128414154, 0.238901287317276, 0.2174869179725647, 0.2623324990272522, 0.258409321308136, 0.26290130615234375, 0.3696693778038025, 0.3087685704231262, 0.24782009422779083], "labels": ["person", "person", "person", "person", "person", "person", "arena", "arena", "arena"]}, {"id": "VS_chart_1_12_1_1", "boxes": [[6, 5, 1426, 1253], [57, 318, 80, 354], [132, 335, 1336, 346], [196, 204, 1350, 591], [135, 508, 1337, 519], [169, 158, 1367, 1024], [144, 679, 1347, 692], [175, 877, 1356, 925], [71, 190, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5348394513130188, 0.20173124969005585, 0.2983388900756836, 0.29035916924476624, 0.27171432971954346, 0.3974834978580475, 0.2667800486087799, 0.21900801360607147, 0.2889283299446106, 0.30039799213409424, 0.25966429710388184], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_2_2", "boxes": [[898, 228, 916, 254], [951, 228, 969, 254], [900, 300, 912, 325], [952, 300, 969, 325], [901, 371, 912, 396], [934, 371, 951, 396], [952, 371, 969, 396], [893, 441, 912, 468], [956, 442, 974, 468], [941, 512, 961, 538], [908, 511, 964, 540]], "scores": [0.2129572182893753, 0.2651890218257904, 0.20842456817626953, 0.21338486671447754, 0.253712922334671, 0.20692847669124603, 0.22053101658821106, 0.22431083023548126, 0.3168787360191345, 0.22029146552085876, 0.21074049174785614], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_6_1_1", "boxes": [[-38, -10, 1620, 1982], [-23, 286, 1586, 1880]], "scores": [0.3126941919326782, 0.40352490544319153], "labels": ["map", "map"]}, {"id": "VD_illusion_2_3_1_2", "boxes": [[86, 8, 653, 537]], "scores": [0.8439432978630066], "labels": ["ladder"]}, {"id": "VD_math_2_3_1_0", "boxes": [[-1, 0, 1669, 1060], [654, -4, 1097, 1065], [0, 285, 1632, 459], [1, 743, 1652, 772]], "scores": [0.3199988603591919, 0.20579646527767181, 0.2985258400440216, 0.3362507224082947], "labels": ["sky", "line", "line", "line"]}, {"id": "VD_ocr_2_12_1_0", "boxes": [[2, 2, 410, 589], [5, 107, 345, 454], [80, 241, 409, 432], [8, 242, 408, 552]], "scores": [0.6796286106109619, 0.2465408593416214, 0.39759835600852966, 0.7052668929100037], "labels": ["poster", "man", "guitar", "guitar"]}, {"id": "VD_video_2_3_2_0", "boxes": [[3, 2, 506, 168], [178, 9, 293, 166], [180, 50, 293, 166], [7, 56, 64, 167], [7, 8, 64, 165], [405, 42, 504, 166], [110, 52, 162, 167], [347, 89, 374, 165], [111, 108, 162, 166], [407, 101, 504, 164]], "scores": [0.22546899318695068, 0.3583371639251709, 0.29392725229263306, 0.2952915132045746, 0.31054797768592834, 0.32334262132644653, 0.27867940068244934, 0.24326126277446747, 0.29650643467903137, 0.3333670198917389], "labels": ["image", "man", "shirt", "shirt", "man", "man", "man", "person", "shirt", "shirt"]}, {"id": "VS_chart_0_0_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_12_1_2", "boxes": [[6, 5, 1426, 1253], [57, 318, 80, 354], [132, 335, 1336, 346], [196, 204, 1350, 591], [135, 508, 1337, 519], [169, 158, 1367, 1024], [144, 679, 1347, 692], [175, 877, 1356, 925], [71, 190, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5348394513130188, 0.20173124969005585, 0.2983388900756836, 0.29035916924476624, 0.27171432971954346, 0.3974834978580475, 0.2667800486087799, 0.21900801360607147, 0.2889283299446106, 0.30039799213409424, 0.25966429710388184], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_2_3", "boxes": [[898, 228, 916, 254], [951, 228, 969, 254], [900, 300, 912, 325], [952, 300, 969, 325], [901, 371, 912, 396], [934, 371, 951, 396], [952, 371, 969, 396], [893, 441, 912, 468], [956, 442, 974, 468], [941, 512, 961, 538], [908, 511, 964, 540]], "scores": [0.2129572182893753, 0.2651890218257904, 0.20842456817626953, 0.21338486671447754, 0.253712922334671, 0.20692847669124603, 0.22053101658821106, 0.22431083023548126, 0.3168787360191345, 0.22029146552085876, 0.21074049174785614], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_6_1_2", "boxes": [[-38, -10, 1620, 1982], [-23, 286, 1586, 1880]], "scores": [0.3126941919326782, 0.40352490544319153], "labels": ["map", "map"]}, {"id": "VD_illusion_1_4_0_0", "boxes": [[13, 5, 348, 204], [391, 3, 723, 198], [1167, 7, 1497, 206], [776, 6, 1109, 205], [13, 253, 347, 447], [773, 254, 1101, 445], [1165, 254, 1495, 446], [392, 254, 721, 445], [12, 497, 346, 693], [1165, 496, 1493, 688], [391, 496, 720, 690], [771, 495, 1100, 689]], "scores": [0.2987760901451111, 0.32511866092681885, 0.28079700469970703, 0.31018543243408203, 0.3323647379875183, 0.33187559247016907, 0.30931127071380615, 0.34041789174079895, 0.34955736994743347, 0.32968974113464355, 0.356137752532959, 0.3363784849643707], "labels": ["square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square", "square"]}, {"id": "VD_math_2_3_1_1", "boxes": [[-1, 0, 1669, 1060], [654, -4, 1097, 1065], [0, 285, 1632, 459], [1, 743, 1652, 772]], "scores": [0.3199988603591919, 0.20579646527767181, 0.2985258400440216, 0.3362507224082947], "labels": ["sky", "line", "line", "line"]}, {"id": "VD_ocr_2_12_1_1", "boxes": [[2, 2, 410, 589], [5, 107, 345, 454], [80, 241, 409, 432], [8, 242, 408, 552]], "scores": [0.6796286106109619, 0.2465408593416214, 0.39759835600852966, 0.7052668929100037], "labels": ["poster", "man", "guitar", "guitar"]}, {"id": "VD_video_2_3_2_1", "boxes": [[3, 2, 506, 168], [178, 9, 293, 166], [180, 50, 293, 166], [7, 56, 64, 167], [7, 8, 64, 165], [405, 42, 504, 166], [110, 52, 162, 167], [347, 89, 374, 165], [111, 108, 162, 166], [407, 101, 504, 164]], "scores": [0.22546899318695068, 0.3583371639251709, 0.29392725229263306, 0.2952915132045746, 0.31054797768592834, 0.32334262132644653, 0.27867940068244934, 0.24326126277446747, 0.29650643467903137, 0.3333670198917389], "labels": ["image", "man", "shirt", "shirt", "man", "man", "man", "person", "shirt", "shirt"]}, {"id": "VS_chart_0_0_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_12_2_0", "boxes": [[6, 5, 1426, 1252], [57, 318, 80, 354], [131, 335, 1336, 346], [196, 204, 1350, 591], [134, 508, 1336, 519], [167, 158, 1368, 1022], [144, 679, 1347, 692], [174, 877, 1356, 925], [71, 192, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5369575023651123, 0.201702281832695, 0.2985374629497528, 0.21863201260566711, 0.2732413709163666, 0.39116305112838745, 0.2667810618877411, 0.22070656716823578, 0.28949394822120667, 0.29911670088768005, 0.25824370980262756], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_3_0", "boxes": [[429, 257, 452, 319], [262, 259, 284, 315], [318, 260, 340, 314], [373, 260, 396, 314], [485, 267, 506, 308], [138, 140, 169, 856], [196, 67, 515, 954]], "scores": [0.2484678328037262, 0.2826550602912903, 0.24740952253341675, 0.20625779032707214, 0.318644255399704, 0.25321975350379944, 0.22125419974327087], "labels": ["number", "number", "number", "number", "number", "text", "text"]}, {"id": "VS_map_2_6_2_0", "boxes": [[-52, -2, 1634, 1645], [78, -2, 1694, 1480], [1106, 827, 1749, 911], [1106, 934, 1678, 1452]], "scores": [0.47148698568344116, 0.25282561779022217, 0.20258553326129913, 0.20691649615764618], "labels": ["map", "map", "line", "line"]}, {"id": "VD_illusion_2_4_1_0", "boxes": [[14, 4, 352, 200], [393, 0, 728, 196], [781, 6, 1120, 202], [1173, 6, 1512, 201], [3, -4, 1507, 701], [378, 199, 732, 255], [14, 252, 349, 447], [396, 254, 725, 445], [775, 253, 1112, 446], [1171, 253, 1510, 446], [25, 449, 1483, 503], [13, 497, 349, 695], [776, 495, 1111, 690], [1173, 495, 1510, 687], [394, 496, 724, 693]], "scores": [0.33752351999282837, 0.3641536831855774, 0.32888442277908325, 0.30097565054893494, 0.462733656167984, 0.22304598987102509, 0.3536578118801117, 0.36341801285743713, 0.36811363697052, 0.35383057594299316, 0.2816299498081207, 0.36957991123199463, 0.36742350459098816, 0.3541624844074249, 0.3746086359024048], "labels": ["square", "square", "square", "square", "grid", "rectangle", "square", "square", "rectangle", "rectangle", "line", "square", "rectangle", "square", "square"]}, {"id": "VD_math_2_3_1_2", "boxes": [[-1, 0, 1669, 1060], [654, -4, 1097, 1065], [0, 285, 1632, 459], [1, 743, 1652, 772]], "scores": [0.3199988603591919, 0.20579646527767181, 0.2985258400440216, 0.3362507224082947], "labels": ["sky", "line", "line", "line"]}, {"id": "VD_ocr_1_13_0_0", "boxes": [[113, 185, 624, 356], [372, 210, 618, 345], [114, 212, 358, 343], [63, 185, 654, 533], [70, 399, 652, 544]], "scores": [0.20858794450759888, 0.5150080323219299, 0.5300670266151428, 0.47852203249931335, 0.322519451379776], "labels": ["logo", "bull", "bull", "logo", "brand"]}, {"id": "VD_video_2_3_2_2", "boxes": [[3, 2, 506, 168], [178, 9, 293, 166], [180, 50, 293, 166], [7, 56, 64, 167], [7, 8, 64, 165], [405, 42, 504, 166], [110, 52, 162, 167], [347, 89, 374, 165], [111, 108, 162, 166], [407, 101, 504, 164]], "scores": [0.22546899318695068, 0.3583371639251709, 0.29392725229263306, 0.2952915132045746, 0.31054797768592834, 0.32334262132644653, 0.27867940068244934, 0.24326126277446747, 0.29650643467903137, 0.3333670198917389], "labels": ["image", "man", "shirt", "shirt", "man", "man", "man", "person", "shirt", "shirt"]}, {"id": "VS_chart_0_0_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_12_2_1", "boxes": [[6, 5, 1426, 1252], [57, 318, 80, 354], [131, 335, 1336, 346], [196, 204, 1350, 591], [134, 508, 1336, 519], [167, 158, 1368, 1022], [144, 679, 1347, 692], [174, 877, 1356, 925], [71, 192, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5369575023651123, 0.201702281832695, 0.2985374629497528, 0.21863201260566711, 0.2732413709163666, 0.39116305112838745, 0.2667810618877411, 0.22070656716823578, 0.28949394822120667, 0.29911670088768005, 0.25824370980262756], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_3_1", "boxes": [[429, 257, 452, 319], [262, 259, 284, 315], [318, 260, 340, 314], [373, 260, 396, 314], [485, 267, 506, 308], [138, 140, 169, 856], [196, 67, 515, 954]], "scores": [0.2484678328037262, 0.2826550602912903, 0.24740952253341675, 0.20625779032707214, 0.318644255399704, 0.25321975350379944, 0.22125419974327087], "labels": ["number", "number", "number", "number", "number", "text", "text"]}, {"id": "VS_map_2_6_2_1", "boxes": [[-52, -2, 1634, 1645], [78, -2, 1694, 1480], [1106, 827, 1749, 911], [1106, 934, 1678, 1452]], "scores": [0.47148698568344116, 0.25282561779022217, 0.20258553326129913, 0.20691649615764618], "labels": ["map", "map", "line", "line"]}, {"id": "VD_illusion_1_5_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_4_0_0", "boxes": [[123, 114, 753, 596], [689, 525, 765, 598]], "scores": [0.26952481269836426, 0.4878329038619995], "labels": ["triangle", "square"]}, {"id": "VD_ocr_1_13_0_1", "boxes": [[113, 185, 624, 356], [372, 210, 618, 345], [114, 212, 358, 343], [63, 185, 654, 533], [70, 399, 652, 544]], "scores": [0.20858794450759888, 0.5150080323219299, 0.5300670266151428, 0.47852203249931335, 0.322519451379776], "labels": ["logo", "bull", "bull", "logo", "brand"]}, {"id": "VD_video_2_3_2_3", "boxes": [[3, 2, 506, 168], [178, 9, 293, 166], [180, 50, 293, 166], [7, 56, 64, 167], [7, 8, 64, 165], [405, 42, 504, 166], [110, 52, 162, 167], [347, 89, 374, 165], [111, 108, 162, 166], [407, 101, 504, 164]], "scores": [0.22546899318695068, 0.3583371639251709, 0.29392725229263306, 0.2952915132045746, 0.31054797768592834, 0.32334262132644653, 0.27867940068244934, 0.24326126277446747, 0.29650643467903137, 0.3333670198917389], "labels": ["image", "man", "shirt", "shirt", "man", "man", "man", "person", "shirt", "shirt"]}, {"id": "VS_chart_0_0_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_12_2_2", "boxes": [[6, 5, 1426, 1252], [57, 318, 80, 354], [131, 335, 1336, 346], [196, 204, 1350, 591], [134, 508, 1336, 519], [167, 158, 1368, 1022], [144, 679, 1347, 692], [174, 877, 1356, 925], [71, 192, 1385, 1201], [98, 1008, 123, 1045], [138, 1024, 1328, 1042]], "scores": [0.5369575023651123, 0.201702281832695, 0.2985374629497528, 0.21863201260566711, 0.2732413709163666, 0.39116305112838745, 0.2667810618877411, 0.22070656716823578, 0.28949394822120667, 0.29911670088768005, 0.25824370980262756], "labels": ["graph", "number", "line", "graph", "line", "graph", "line", "line", "graph", "number", "line"]}, {"id": "VS_table_2_6_3_2", "boxes": [[429, 257, 452, 319], [262, 259, 284, 315], [318, 260, 340, 314], [373, 260, 396, 314], [485, 267, 506, 308], [138, 140, 169, 856], [196, 67, 515, 954]], "scores": [0.2484678328037262, 0.2826550602912903, 0.24740952253341675, 0.20625779032707214, 0.318644255399704, 0.25321975350379944, 0.22125419974327087], "labels": ["number", "number", "number", "number", "number", "text", "text"]}, {"id": "VS_map_2_6_2_2", "boxes": [[-52, -2, 1634, 1645], [78, -2, 1694, 1480], [1106, 827, 1749, 911], [1106, 934, 1678, 1452]], "scores": [0.47148698568344116, 0.25282561779022217, 0.20258553326129913, 0.20691649615764618], "labels": ["map", "map", "line", "line"]}, {"id": "VD_illusion_1_5_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_4_0_1", "boxes": [[123, 114, 753, 596], [689, 525, 765, 598]], "scores": [0.26952481269836426, 0.4878329038619995], "labels": ["triangle", "square"]}, {"id": "VD_ocr_2_13_1_0", "boxes": [[291, 64, 461, 264], [113, 98, 365, 237], [381, 95, 634, 236], [62, 71, 690, 426], [69, 298, 696, 442]], "scores": [0.214250847697258, 0.41001370549201965, 0.43767285346984863, 0.4708695411682129, 0.3743131160736084], "labels": ["logo", "bull", "bull", "logo", "brand"]}, {"id": "VD_video_1_4_0_0", "boxes": [[22, 28, 731, 39], [155, 45, 312, 112], [527, 44, 688, 111], [157, 41, 332, 195], [176, 130, 333, 195], [10, 15, 735, 682], [21, 264, 730, 275], [150, 279, 306, 346], [522, 279, 682, 347], [294, 321, 433, 465], [20, 475, 733, 486], [149, 488, 700, 559], [367, 492, 525, 559], [152, 490, 308, 558], [525, 491, 684, 557], [400, 554, 502, 658]], "scores": [0.335815966129303, 0.40099167823791504, 0.4036642909049988, 0.20152871310710907, 0.40347015857696533, 0.33537837862968445, 0.3843245208263397, 0.3983616828918457, 0.40058520436286926, 0.37533196806907654, 0.3840416669845581, 0.23003080487251282, 0.39774665236473083, 0.3965839445590973, 0.3891085684299469, 0.21543021500110626], "labels": ["line", "car", "car", "car", "car", "graph", "line", "car", "car", "car", "line", "car", "car", "car", "car", "line"]}, {"id": "VS_chart_1_0_1_0", "boxes": [[2, 2, 973, 762], [337, 138, 794, 602], [330, 635, 350, 653]], "scores": [0.40619519352912903, 0.5031700134277344, 0.2003738284111023], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_0_13_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_6_3_3", "boxes": [[429, 257, 452, 319], [262, 259, 284, 315], [318, 260, 340, 314], [373, 260, 396, 314], [485, 267, 506, 308], [138, 140, 169, 856], [196, 67, 515, 954]], "scores": [0.2484678328037262, 0.2826550602912903, 0.24740952253341675, 0.20625779032707214, 0.318644255399704, 0.25321975350379944, 0.22125419974327087], "labels": ["number", "number", "number", "number", "number", "text", "text"]}, {"id": "VS_map_0_7_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_5_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_4_0_2", "boxes": [[123, 114, 753, 596], [689, 525, 765, 598]], "scores": [0.26952481269836426, 0.4878329038619995], "labels": ["triangle", "square"]}, {"id": "VD_ocr_2_13_1_1", "boxes": [[291, 64, 461, 264], [113, 98, 365, 237], [381, 95, 634, 236], [62, 71, 690, 426], [69, 298, 696, 442]], "scores": [0.214250847697258, 0.41001370549201965, 0.43767285346984863, 0.4708695411682129, 0.3743131160736084], "labels": ["logo", "bull", "bull", "logo", "brand"]}, {"id": "VD_video_1_4_0_1", "boxes": [[22, 28, 731, 39], [155, 45, 312, 112], [527, 44, 688, 111], [157, 41, 332, 195], [176, 130, 333, 195], [10, 15, 735, 682], [21, 264, 730, 275], [150, 279, 306, 346], [522, 279, 682, 347], [294, 321, 433, 465], [20, 475, 733, 486], [149, 488, 700, 559], [367, 492, 525, 559], [152, 490, 308, 558], [525, 491, 684, 557], [400, 554, 502, 658]], "scores": [0.335815966129303, 0.40099167823791504, 0.4036642909049988, 0.20152871310710907, 0.40347015857696533, 0.33537837862968445, 0.3843245208263397, 0.3983616828918457, 0.40058520436286926, 0.37533196806907654, 0.3840416669845581, 0.23003080487251282, 0.39774665236473083, 0.3965839445590973, 0.3891085684299469, 0.21543021500110626], "labels": ["line", "car", "car", "car", "car", "graph", "line", "car", "car", "car", "line", "car", "car", "car", "car", "line"]}, {"id": "VS_chart_1_0_1_1", "boxes": [[2, 2, 973, 762], [337, 138, 794, 602], [330, 635, 350, 653]], "scores": [0.40619519352912903, 0.5031700134277344, 0.2003738284111023], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_0_13_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_7_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_7_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_5_1_0", "boxes": [[138, 142, 408, 410]], "scores": [0.22728508710861206], "labels": ["pattern"]}, {"id": "VD_math_2_4_1_0", "boxes": [[125, 121, 755, 595]], "scores": [0.4339631199836731], "labels": ["triangle"]}, {"id": "VD_ocr_1_14_0_0", "boxes": [[286, 24, 343, 86], [70, 35, 256, 114], [4, 8, 372, 555], [220, 124, 295, 238], [66, 144, 155, 224], [198, 192, 248, 261], [300, 216, 379, 340], [301, 216, 378, 315], [3, 150, 373, 556], [1, 434, 109, 562], [158, 447, 312, 564], [96, 449, 187, 565]], "scores": [0.40300828218460083, 0.3894660174846649, 0.5884750485420227, 0.2229771614074707, 0.29324352741241455, 0.20772375166416168, 0.21402664482593536, 0.2356163114309311, 0.5092528462409973, 0.2778773605823517, 0.28660666942596436, 0.226152703166008], "labels": ["poster", "street sign", "animation film", "animal", "street sign", "animal", "animal", "animal", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_1_4_0_2", "boxes": [[22, 28, 731, 39], [155, 45, 312, 112], [527, 44, 688, 111], [157, 41, 332, 195], [176, 130, 333, 195], [10, 15, 735, 682], [21, 264, 730, 275], [150, 279, 306, 346], [522, 279, 682, 347], [294, 321, 433, 465], [20, 475, 733, 486], [149, 488, 700, 559], [367, 492, 525, 559], [152, 490, 308, 558], [525, 491, 684, 557], [400, 554, 502, 658]], "scores": [0.335815966129303, 0.40099167823791504, 0.4036642909049988, 0.20152871310710907, 0.40347015857696533, 0.33537837862968445, 0.3843245208263397, 0.3983616828918457, 0.40058520436286926, 0.37533196806907654, 0.3840416669845581, 0.23003080487251282, 0.39774665236473083, 0.3965839445590973, 0.3891085684299469, 0.21543021500110626], "labels": ["line", "car", "car", "car", "car", "graph", "line", "car", "car", "car", "line", "car", "car", "car", "car", "line"]}, {"id": "VS_chart_1_0_1_2", "boxes": [[2, 2, 973, 762], [337, 138, 794, 602], [330, 635, 350, 653]], "scores": [0.40619519352912903, 0.5031700134277344, 0.2003738284111023], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_1_13_1_0", "boxes": [[0, 0, 807, 672], [11, 118, 786, 128], [16, 114, 769, 480], [15, 217, 775, 226], [18, 267, 773, 275], [71, 134, 763, 467], [23, 364, 770, 373], [46, 460, 56, 472], [66, 466, 757, 477]], "scores": [0.4575601816177368, 0.24277347326278687, 0.3898736238479614, 0.253117173910141, 0.237295001745224, 0.35131216049194336, 0.29621511697769165, 0.2860274016857147, 0.3105889558792114], "labels": ["graph", "line", "graph", "line", "line", "graph", "line", "number", "line"]}, {"id": "VS_table_0_7_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_7_1_0", "boxes": [[0, 2, 439, 529], [327, 50, 409, 116], [26, 96, 414, 469], [404, 236, 414, 291], [379, 225, 418, 292], [104, 393, 228, 431], [269, 439, 312, 502], [279, 439, 303, 502]], "scores": [0.3351864516735077, 0.46209120750427246, 0.635481595993042, 0.20207685232162476, 0.2769911587238312, 0.287116676568985, 0.2189449965953827, 0.5046786665916443], "labels": ["map", "flag", "map", "landmark", "attraction", "cruise ship", "landmark", "landmark"]}, {"id": "VD_illusion_2_5_1_1", "boxes": [[138, 142, 408, 410]], "scores": [0.22728508710861206], "labels": ["pattern"]}, {"id": "VD_math_2_4_1_1", "boxes": [[125, 121, 755, 595]], "scores": [0.4339631199836731], "labels": ["triangle"]}, {"id": "VD_ocr_1_14_0_1", "boxes": [[286, 24, 343, 86], [70, 35, 256, 114], [4, 8, 372, 555], [220, 124, 295, 238], [66, 144, 155, 224], [198, 192, 248, 261], [300, 216, 379, 340], [301, 216, 378, 315], [3, 150, 373, 556], [1, 434, 109, 562], [158, 447, 312, 564], [96, 449, 187, 565]], "scores": [0.40300828218460083, 0.3894660174846649, 0.5884750485420227, 0.2229771614074707, 0.29324352741241455, 0.20772375166416168, 0.21402664482593536, 0.2356163114309311, 0.5092528462409973, 0.2778773605823517, 0.28660666942596436, 0.226152703166008], "labels": ["poster", "street sign", "animation film", "animal", "street sign", "animal", "animal", "animal", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_1_4_0_3", "boxes": [[22, 28, 731, 39], [155, 45, 312, 112], [527, 44, 688, 111], [157, 41, 332, 195], [176, 130, 333, 195], [10, 15, 735, 682], [21, 264, 730, 275], [150, 279, 306, 346], [522, 279, 682, 347], [294, 321, 433, 465], [20, 475, 733, 486], [149, 488, 700, 559], [367, 492, 525, 559], [152, 490, 308, 558], [525, 491, 684, 557], [400, 554, 502, 658]], "scores": [0.335815966129303, 0.40099167823791504, 0.4036642909049988, 0.20152871310710907, 0.40347015857696533, 0.33537837862968445, 0.3843245208263397, 0.3983616828918457, 0.40058520436286926, 0.37533196806907654, 0.3840416669845581, 0.23003080487251282, 0.39774665236473083, 0.3965839445590973, 0.3891085684299469, 0.21543021500110626], "labels": ["line", "car", "car", "car", "car", "graph", "line", "car", "car", "car", "line", "car", "car", "car", "car", "line"]}, {"id": "VS_chart_1_0_1_3", "boxes": [[2, 2, 973, 762], [337, 138, 794, 602], [330, 635, 350, 653]], "scores": [0.40619519352912903, 0.5031700134277344, 0.2003738284111023], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_1_13_1_1", "boxes": [[0, 0, 807, 672], [11, 118, 786, 128], [16, 114, 769, 480], [15, 217, 775, 226], [18, 267, 773, 275], [71, 134, 763, 467], [23, 364, 770, 373], [46, 460, 56, 472], [66, 466, 757, 477]], "scores": [0.4575601816177368, 0.24277347326278687, 0.3898736238479614, 0.253117173910141, 0.237295001745224, 0.35131216049194336, 0.29621511697769165, 0.2860274016857147, 0.3105889558792114], "labels": ["graph", "line", "graph", "line", "line", "graph", "line", "number", "line"]}, {"id": "VS_table_0_7_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_7_1_1", "boxes": [[0, 2, 439, 529], [327, 50, 409, 116], [26, 96, 414, 469], [404, 236, 414, 291], [379, 225, 418, 292], [104, 393, 228, 431], [269, 439, 312, 502], [279, 439, 303, 502]], "scores": [0.3351864516735077, 0.46209120750427246, 0.635481595993042, 0.20207685232162476, 0.2769911587238312, 0.287116676568985, 0.2189449965953827, 0.5046786665916443], "labels": ["map", "flag", "map", "landmark", "attraction", "cruise ship", "landmark", "landmark"]}, {"id": "VD_illusion_2_5_1_2", "boxes": [[138, 142, 408, 410]], "scores": [0.22728508710861206], "labels": ["pattern"]}, {"id": "VD_math_2_4_1_2", "boxes": [[125, 121, 755, 595]], "scores": [0.4339631199836731], "labels": ["triangle"]}, {"id": "VD_ocr_2_14_1_0", "boxes": [[298, 23, 358, 88], [72, 36, 267, 116], [312, 30, 339, 83], [5, 5, 389, 574], [69, 148, 163, 233], [230, 127, 315, 246], [206, 198, 258, 268], [316, 224, 395, 323], [313, 222, 395, 357], [228, 274, 393, 570], [4, 164, 389, 578], [100, 385, 173, 495], [0, 446, 117, 581], [164, 464, 326, 583], [98, 465, 196, 584]], "scores": [0.3413538336753845, 0.4071471095085144, 0.2104974091053009, 0.5900241136550903, 0.25546014308929443, 0.22513921558856964, 0.22894836962223053, 0.20456738770008087, 0.20659229159355164, 0.2013157457113266, 0.4767121374607086, 0.2006596326828003, 0.2760276794433594, 0.28455957770347595, 0.2309591919183731], "labels": ["poster", "street sign", "animation film", "animation film", "street sign", "animal", "animal", "animal", "animal", "animation film", "animation film", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_2_4_1_0", "boxes": [[23, 24, 734, 34], [146, 38, 699, 108], [368, 40, 525, 106], [151, 40, 310, 107], [525, 40, 685, 107], [10, 18, 734, 680], [19, 235, 727, 246], [21, 234, 729, 244], [150, 250, 308, 318], [522, 250, 683, 317], [295, 292, 434, 436], [23, 451, 726, 462], [156, 467, 310, 534], [528, 466, 688, 533], [159, 460, 334, 619], [178, 550, 335, 616]], "scores": [0.29180893301963806, 0.2559487819671631, 0.361890584230423, 0.38158339262008667, 0.36007052659988403, 0.31330162286758423, 0.3543889820575714, 0.2659742832183838, 0.39988863468170166, 0.4010797441005707, 0.3809375762939453, 0.395605206489563, 0.3937763571739197, 0.40377411246299744, 0.24007698893547058, 0.38378146290779114], "labels": ["line", "parking", "car", "car", "car", "graph", "line", "line", "car", "car", "car", "line", "car", "car", "parking", "car"]}, {"id": "VS_chart_0_1_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_13_2_0", "boxes": [[2, -1, 807, 670], [11, 118, 763, 128], [160, 109, 166, 453], [19, 111, 759, 475], [14, 217, 764, 226], [19, 267, 760, 275], [74, 133, 746, 466], [23, 365, 755, 373], [46, 460, 56, 472], [66, 466, 747, 478]], "scores": [0.4543183147907257, 0.22139514982700348, 0.21133312582969666, 0.25504952669143677, 0.2631392180919647, 0.23939380049705505, 0.3603525459766388, 0.2950989902019501, 0.2891324460506439, 0.32551702857017517], "labels": ["graph", "line", "line", "graph", "line", "line", "graph", "line", "number", "line"]}, {"id": "VS_table_0_7_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_7_2_0", "boxes": [[0, 2, 439, 529], [327, 50, 409, 116], [26, 96, 414, 470], [405, 236, 414, 291], [379, 225, 418, 292], [104, 393, 228, 431], [269, 439, 312, 502], [279, 439, 303, 502]], "scores": [0.33817732334136963, 0.4614129960536957, 0.6335359215736389, 0.20238028466701508, 0.2768097519874573, 0.2870439887046814, 0.2549467384815216, 0.48703187704086304], "labels": ["map", "flag", "map", "landmark", "attraction", "cruise ship", "landmark", "landmark"]}, {"id": "VD_illusion_1_6_0_0", "boxes": [[177, 5, 968, 817]], "scores": [0.2150595784187317], "labels": ["qr code"]}, {"id": "VD_math_1_5_0_0", "boxes": [[248, 26, 274, 58], [0, 2, 577, 482], [87, 74, 500, 387]], "scores": [0.2648198902606964, 0.21328282356262207, 0.7959225177764893], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_2_14_1_1", "boxes": [[298, 23, 358, 88], [72, 36, 267, 116], [312, 30, 339, 83], [5, 5, 389, 574], [69, 148, 163, 233], [230, 127, 315, 246], [206, 198, 258, 268], [316, 224, 395, 323], [313, 222, 395, 357], [228, 274, 393, 570], [4, 164, 389, 578], [100, 385, 173, 495], [0, 446, 117, 581], [164, 464, 326, 583], [98, 465, 196, 584]], "scores": [0.3413538336753845, 0.4071471095085144, 0.2104974091053009, 0.5900241136550903, 0.25546014308929443, 0.22513921558856964, 0.22894836962223053, 0.20456738770008087, 0.20659229159355164, 0.2013157457113266, 0.4767121374607086, 0.2006596326828003, 0.2760276794433594, 0.28455957770347595, 0.2309591919183731], "labels": ["poster", "street sign", "animation film", "animation film", "street sign", "animal", "animal", "animal", "animal", "animation film", "animation film", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_2_4_1_1", "boxes": [[23, 24, 734, 34], [146, 38, 699, 108], [368, 40, 525, 106], [151, 40, 310, 107], [525, 40, 685, 107], [10, 18, 734, 680], [19, 235, 727, 246], [21, 234, 729, 244], [150, 250, 308, 318], [522, 250, 683, 317], [295, 292, 434, 436], [23, 451, 726, 462], [156, 467, 310, 534], [528, 466, 688, 533], [159, 460, 334, 619], [178, 550, 335, 616]], "scores": [0.29180893301963806, 0.2559487819671631, 0.361890584230423, 0.38158339262008667, 0.36007052659988403, 0.31330162286758423, 0.3543889820575714, 0.2659742832183838, 0.39988863468170166, 0.4010797441005707, 0.3809375762939453, 0.395605206489563, 0.3937763571739197, 0.40377411246299744, 0.24007698893547058, 0.38378146290779114], "labels": ["line", "parking", "car", "car", "car", "graph", "line", "line", "car", "car", "car", "line", "car", "car", "parking", "car"]}, {"id": "VS_chart_0_1_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_13_2_1", "boxes": [[2, -1, 807, 670], [11, 118, 763, 128], [160, 109, 166, 453], [19, 111, 759, 475], [14, 217, 764, 226], [19, 267, 760, 275], [74, 133, 746, 466], [23, 365, 755, 373], [46, 460, 56, 472], [66, 466, 747, 478]], "scores": [0.4543183147907257, 0.22139514982700348, 0.21133312582969666, 0.25504952669143677, 0.2631392180919647, 0.23939380049705505, 0.3603525459766388, 0.2950989902019501, 0.2891324460506439, 0.32551702857017517], "labels": ["graph", "line", "line", "graph", "line", "line", "graph", "line", "number", "line"]}, {"id": "VS_table_0_7_0_4", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_7_2_1", "boxes": [[0, 2, 439, 529], [327, 50, 409, 116], [26, 96, 414, 470], [405, 236, 414, 291], [379, 225, 418, 292], [104, 393, 228, 431], [269, 439, 312, 502], [279, 439, 303, 502]], "scores": [0.33817732334136963, 0.4614129960536957, 0.6335359215736389, 0.20238028466701508, 0.2768097519874573, 0.2870439887046814, 0.2549467384815216, 0.48703187704086304], "labels": ["map", "flag", "map", "landmark", "attraction", "cruise ship", "landmark", "landmark"]}, {"id": "VD_illusion_2_6_1_0", "boxes": [[235, 21, 954, 822]], "scores": [0.23776891827583313], "labels": ["qr code"]}, {"id": "VD_math_1_5_0_1", "boxes": [[248, 26, 274, 58], [0, 2, 577, 482], [87, 74, 500, 387]], "scores": [0.2648198902606964, 0.21328282356262207, 0.7959225177764893], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_1_15_0_0", "boxes": [[0, 0, 218, 32], [0, 2, 216, 222], [187, 62, 200, 74], [169, 84, 197, 114], [15, 30, 202, 219], [122, 117, 153, 146], [45, 126, 59, 139], [43, 113, 110, 179], [161, 134, 172, 146], [95, 128, 126, 161], [95, 117, 165, 173], [169, 144, 176, 150], [130, 140, 162, 173], [44, 114, 172, 178]], "scores": [0.3055999279022217, 0.363934725522995, 0.25071266293525696, 0.4125184118747711, 0.28905901312828064, 0.46672308444976807, 0.44137871265411377, 0.3751760721206665, 0.41015398502349854, 0.5210679769515991, 0.4716718792915344, 0.32219651341438293, 0.4866407513618469, 0.248648539185524], "labels": ["lid", "yoghurt", "chocolate", "chocolate", "yoghurt", "chocolate", "chocolate", "vanilla", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate"]}, {"id": "VD_video_2_4_1_2", "boxes": [[23, 24, 734, 34], [146, 38, 699, 108], [368, 40, 525, 106], [151, 40, 310, 107], [525, 40, 685, 107], [10, 18, 734, 680], [19, 235, 727, 246], [21, 234, 729, 244], [150, 250, 308, 318], [522, 250, 683, 317], [295, 292, 434, 436], [23, 451, 726, 462], [156, 467, 310, 534], [528, 466, 688, 533], [159, 460, 334, 619], [178, 550, 335, 616]], "scores": [0.29180893301963806, 0.2559487819671631, 0.361890584230423, 0.38158339262008667, 0.36007052659988403, 0.31330162286758423, 0.3543889820575714, 0.2659742832183838, 0.39988863468170166, 0.4010797441005707, 0.3809375762939453, 0.395605206489563, 0.3937763571739197, 0.40377411246299744, 0.24007698893547058, 0.38378146290779114], "labels": ["line", "parking", "car", "car", "car", "graph", "line", "line", "car", "car", "car", "line", "car", "car", "parking", "car"]}, {"id": "VS_chart_0_1_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_14_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_7_0_5", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_8_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_7_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_5_0_2", "boxes": [[248, 26, 274, 58], [0, 2, 577, 482], [87, 74, 500, 387]], "scores": [0.2648198902606964, 0.21328282356262207, 0.7959225177764893], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_1_15_0_1", "boxes": [[0, 0, 218, 32], [0, 2, 216, 222], [187, 62, 200, 74], [169, 84, 197, 114], [15, 30, 202, 219], [122, 117, 153, 146], [45, 126, 59, 139], [43, 113, 110, 179], [161, 134, 172, 146], [95, 128, 126, 161], [95, 117, 165, 173], [169, 144, 176, 150], [130, 140, 162, 173], [44, 114, 172, 178]], "scores": [0.3055999279022217, 0.363934725522995, 0.25071266293525696, 0.4125184118747711, 0.28905901312828064, 0.46672308444976807, 0.44137871265411377, 0.3751760721206665, 0.41015398502349854, 0.5210679769515991, 0.4716718792915344, 0.32219651341438293, 0.4866407513618469, 0.248648539185524], "labels": ["lid", "yoghurt", "chocolate", "chocolate", "yoghurt", "chocolate", "chocolate", "vanilla", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate"]}, {"id": "VD_video_2_4_1_3", "boxes": [[23, 24, 734, 34], [146, 38, 699, 108], [368, 40, 525, 106], [151, 40, 310, 107], [525, 40, 685, 107], [10, 18, 734, 680], [19, 235, 727, 246], [21, 234, 729, 244], [150, 250, 308, 318], [522, 250, 683, 317], [295, 292, 434, 436], [23, 451, 726, 462], [156, 467, 310, 534], [528, 466, 688, 533], [159, 460, 334, 619], [178, 550, 335, 616]], "scores": [0.29180893301963806, 0.2559487819671631, 0.361890584230423, 0.38158339262008667, 0.36007052659988403, 0.31330162286758423, 0.3543889820575714, 0.2659742832183838, 0.39988863468170166, 0.4010797441005707, 0.3809375762939453, 0.395605206489563, 0.3937763571739197, 0.40377411246299744, 0.24007698893547058, 0.38378146290779114], "labels": ["line", "parking", "car", "car", "car", "graph", "line", "line", "car", "car", "car", "line", "car", "car", "parking", "car"]}, {"id": "VS_chart_0_1_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_14_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_7_1_0", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_0_8_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_7_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_5_1_0", "boxes": [[298, 16, 330, 55], [0, 0, 579, 424], [54, 63, 538, 337]], "scores": [0.34967243671417236, 0.23368984460830688, 0.763053297996521], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_2_15_1_0", "boxes": [[25, 34, 254, 70], [24, 35, 254, 266], [220, 99, 235, 113], [200, 99, 236, 155], [203, 123, 231, 154], [39, 61, 239, 262], [154, 157, 186, 187], [74, 167, 89, 181], [74, 155, 205, 213], [72, 151, 141, 212], [125, 169, 159, 203], [194, 174, 206, 187], [203, 185, 211, 192], [126, 157, 197, 211], [163, 181, 196, 211]], "scores": [0.37767648696899414, 0.39290234446525574, 0.20276737213134766, 0.26881980895996094, 0.3052850663661957, 0.26315295696258545, 0.4874594509601593, 0.45596417784690857, 0.22858084738254547, 0.4667540192604065, 0.5425271987915039, 0.35415413975715637, 0.303877592086792, 0.43845459818840027, 0.4956000745296478], "labels": ["lid", "yoghurt", "chocolate bar", "chocolate", "chocolate", "milk", "chocolate", "chocolate", "chocolate", "vanilla", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate"]}, {"id": "VD_video_2_4_2_0", "boxes": [[21, 27, 729, 37], [154, 44, 310, 111], [525, 43, 686, 111], [154, 41, 328, 193], [175, 129, 332, 195], [8, 13, 732, 681], [18, 255, 734, 266], [365, 272, 524, 338], [522, 271, 683, 338], [149, 271, 307, 339], [142, 270, 698, 338], [394, 331, 502, 439], [22, 466, 724, 477], [148, 481, 304, 548], [520, 481, 680, 548], [292, 521, 431, 667]], "scores": [0.33111414313316345, 0.39897024631500244, 0.39935970306396484, 0.255850225687027, 0.4006105959415436, 0.28316766023635864, 0.37138631939888, 0.40257975459098816, 0.3915860950946808, 0.4021136164665222, 0.2929970920085907, 0.22561146318912506, 0.40097302198410034, 0.3965117037296295, 0.4017775058746338, 0.38066789507865906], "labels": ["line", "car", "car", "parking", "car", "graph", "line", "car", "car", "car", "parking", "line", "line", "car", "car", "vehicle"]}, {"id": "VS_chart_0_1_0_4", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_14_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_7_1_1", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_0_8_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_7_1_0", "boxes": [[302, 129, 1312, 554], [409, 328, 1224, 554]], "scores": [0.20677918195724487, 0.25560474395751953], "labels": ["curve", "curve"]}, {"id": "VD_math_2_5_1_1", "boxes": [[298, 16, 330, 55], [0, 0, 579, 424], [54, 63, 538, 337]], "scores": [0.34967243671417236, 0.23368984460830688, 0.763053297996521], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_2_15_1_1", "boxes": [[25, 34, 254, 70], [24, 35, 254, 266], [220, 99, 235, 113], [200, 99, 236, 155], [203, 123, 231, 154], [39, 61, 239, 262], [154, 157, 186, 187], [74, 167, 89, 181], [74, 155, 205, 213], [72, 151, 141, 212], [125, 169, 159, 203], [194, 174, 206, 187], [203, 185, 211, 192], [126, 157, 197, 211], [163, 181, 196, 211]], "scores": [0.37767648696899414, 0.39290234446525574, 0.20276737213134766, 0.26881980895996094, 0.3052850663661957, 0.26315295696258545, 0.4874594509601593, 0.45596417784690857, 0.22858084738254547, 0.4667540192604065, 0.5425271987915039, 0.35415413975715637, 0.303877592086792, 0.43845459818840027, 0.4956000745296478], "labels": ["lid", "yoghurt", "chocolate bar", "chocolate", "chocolate", "milk", "chocolate", "chocolate", "chocolate", "vanilla", "chocolate", "chocolate", "chocolate", "chocolate", "chocolate"]}, {"id": "VD_video_2_4_2_1", "boxes": [[21, 27, 729, 37], [154, 44, 310, 111], [525, 43, 686, 111], [154, 41, 328, 193], [175, 129, 332, 195], [8, 13, 732, 681], [18, 255, 734, 266], [365, 272, 524, 338], [522, 271, 683, 338], [149, 271, 307, 339], [142, 270, 698, 338], [394, 331, 502, 439], [22, 466, 724, 477], [148, 481, 304, 548], [520, 481, 680, 548], [292, 521, 431, 667]], "scores": [0.33111414313316345, 0.39897024631500244, 0.39935970306396484, 0.255850225687027, 0.4006105959415436, 0.28316766023635864, 0.37138631939888, 0.40257975459098816, 0.3915860950946808, 0.4021136164665222, 0.2929970920085907, 0.22561146318912506, 0.40097302198410034, 0.3965117037296295, 0.4017775058746338, 0.38066789507865906], "labels": ["line", "car", "car", "parking", "car", "graph", "line", "car", "car", "car", "parking", "line", "line", "car", "car", "vehicle"]}, {"id": "VS_chart_0_1_0_5", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_14_1_0", "boxes": [[470, 574, 1218, 591], [1304, 564, 1864, 1124], [1690, 785, 1710, 811], [1557, 972, 1576, 999], [1579, 972, 1597, 998]], "scores": [0.24160586297512054, 0.4251479506492615, 0.25044918060302734, 0.24055883288383484, 0.22889229655265808], "labels": ["line", "circle", "number", "number", "number"]}, {"id": "VS_table_1_7_1_2", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_1_8_1_0", "boxes": [[2, 40, 1266, 709], [0, -5, 1287, 749]], "scores": [0.7894122004508972, 0.2506192624568939], "labels": ["map", "map"]}, {"id": "VD_illusion_2_7_1_1", "boxes": [[302, 129, 1312, 554], [409, 328, 1224, 554]], "scores": [0.20677918195724487, 0.25560474395751953], "labels": ["curve", "curve"]}, {"id": "VD_math_2_5_1_2", "boxes": [[298, 16, 330, 55], [0, 0, 579, 424], [54, 63, 538, 337]], "scores": [0.34967243671417236, 0.23368984460830688, 0.763053297996521], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_ocr_1_16_0_0", "boxes": [[43, 13, 305, 161], [168, 33, 295, 159], [46, 19, 303, 352], [52, 85, 295, 350], [118, 208, 235, 291], [191, 246, 226, 289], [220, 251, 244, 282]], "scores": [0.486880362033844, 0.20590202510356903, 0.3647608160972595, 0.4365192651748657, 0.4110172986984253, 0.24434730410575867, 0.23080100119113922], "labels": ["icecream", "icecream", "icecream", "cup", "icecream", "scoop", "peanut butter"]}, {"id": "VD_video_2_4_2_2", "boxes": [[21, 27, 729, 37], [154, 44, 310, 111], [525, 43, 686, 111], [154, 41, 328, 193], [175, 129, 332, 195], [8, 13, 732, 681], [18, 255, 734, 266], [365, 272, 524, 338], [522, 271, 683, 338], [149, 271, 307, 339], [142, 270, 698, 338], [394, 331, 502, 439], [22, 466, 724, 477], [148, 481, 304, 548], [520, 481, 680, 548], [292, 521, 431, 667]], "scores": [0.33111414313316345, 0.39897024631500244, 0.39935970306396484, 0.255850225687027, 0.4006105959415436, 0.28316766023635864, 0.37138631939888, 0.40257975459098816, 0.3915860950946808, 0.4021136164665222, 0.2929970920085907, 0.22561146318912506, 0.40097302198410034, 0.3965117037296295, 0.4017775058746338, 0.38066789507865906], "labels": ["line", "car", "car", "parking", "car", "graph", "line", "car", "car", "car", "parking", "line", "line", "car", "car", "vehicle"]}, {"id": "VS_chart_0_1_0_6", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_14_1_1", "boxes": [[470, 574, 1218, 591], [1304, 564, 1864, 1124], [1690, 785, 1710, 811], [1557, 972, 1576, 999], [1579, 972, 1597, 998]], "scores": [0.24160586297512054, 0.4251479506492615, 0.25044918060302734, 0.24055883288383484, 0.22889229655265808], "labels": ["line", "circle", "number", "number", "number"]}, {"id": "VS_table_1_7_1_3", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_1_8_1_1", "boxes": [[2, 40, 1266, 709], [0, -5, 1287, 749]], "scores": [0.7894122004508972, 0.2506192624568939], "labels": ["map", "map"]}, {"id": "VD_illusion_1_8_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_6_0_0", "boxes": [[4, 13, 833, 654], [120, 35, 718, 631]], "scores": [0.28878849744796753, 0.8242736458778381], "labels": ["shape", "shape"]}, {"id": "VD_ocr_1_16_0_1", "boxes": [[43, 13, 305, 161], [168, 33, 295, 159], [46, 19, 303, 352], [52, 85, 295, 350], [118, 208, 235, 291], [191, 246, 226, 289], [220, 251, 244, 282]], "scores": [0.486880362033844, 0.20590202510356903, 0.3647608160972595, 0.4365192651748657, 0.4110172986984253, 0.24434730410575867, 0.23080100119113922], "labels": ["icecream", "icecream", "icecream", "cup", "icecream", "scoop", "peanut butter"]}, {"id": "VD_video_2_4_2_3", "boxes": [[21, 27, 729, 37], [154, 44, 310, 111], [525, 43, 686, 111], [154, 41, 328, 193], [175, 129, 332, 195], [8, 13, 732, 681], [18, 255, 734, 266], [365, 272, 524, 338], [522, 271, 683, 338], [149, 271, 307, 339], [142, 270, 698, 338], [394, 331, 502, 439], [22, 466, 724, 477], [148, 481, 304, 548], [520, 481, 680, 548], [292, 521, 431, 667]], "scores": [0.33111414313316345, 0.39897024631500244, 0.39935970306396484, 0.255850225687027, 0.4006105959415436, 0.28316766023635864, 0.37138631939888, 0.40257975459098816, 0.3915860950946808, 0.4021136164665222, 0.2929970920085907, 0.22561146318912506, 0.40097302198410034, 0.3965117037296295, 0.4017775058746338, 0.38066789507865906], "labels": ["line", "car", "car", "parking", "car", "graph", "line", "car", "car", "car", "parking", "line", "line", "car", "car", "vehicle"]}, {"id": "VS_chart_0_1_0_7", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_14_1_2", "boxes": [[470, 574, 1218, 591], [1304, 564, 1864, 1124], [1690, 785, 1710, 811], [1557, 972, 1576, 999], [1579, 972, 1597, 998]], "scores": [0.24160586297512054, 0.4251479506492615, 0.25044918060302734, 0.24055883288383484, 0.22889229655265808], "labels": ["line", "circle", "number", "number", "number"]}, {"id": "VS_table_1_7_1_4", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_1_8_1_2", "boxes": [[2, 40, 1266, 709], [0, -5, 1287, 749]], "scores": [0.7894122004508972, 0.2506192624568939], "labels": ["map", "map"]}, {"id": "VD_illusion_1_8_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_6_0_1", "boxes": [[4, 13, 833, 654], [120, 35, 718, 631]], "scores": [0.28878849744796753, 0.8242736458778381], "labels": ["shape", "shape"]}, {"id": "VD_ocr_2_16_1_0", "boxes": [[31, 16, 301, 167], [34, 19, 300, 362], [44, 109, 290, 363], [108, 217, 233, 297]], "scores": [0.5115664601325989, 0.38438108563423157, 0.439646452665329, 0.4568607211112976], "labels": ["icecream", "icecream", "cup", "icecream"]}, {"id": "VD_video_1_5_0_0", "boxes": [[1338, 6, 1578, 553], [6, 2, 1718, 575], [662, 2, 1057, 550], [137, 14, 430, 554], [766, 5, 1027, 553], [246, 332, 394, 545], [1371, 395, 1478, 547], [809, 374, 961, 546]], "scores": [0.5115486979484558, 0.2030431628227234, 0.5379964709281921, 0.5533844232559204, 0.3412485718727112, 0.5184046626091003, 0.38962435722351074, 0.4778502583503723], "labels": ["child", "photo", "child", "child", "child", "jeans", "jeans", "jeans"]}, {"id": "VS_chart_1_1_1_0", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_2_14_2_0", "boxes": [[-34, 13, 1828, 1281], [580, 71, 596, 95], [465, 70, 484, 95], [216, 71, 1955, 1165], [1297, 557, 1874, 1128], [465, 1129, 484, 1154], [577, 1129, 596, 1154]], "scores": [0.3566090762615204, 0.216477170586586, 0.2467927485704422, 0.4424324035644531, 0.3887743651866913, 0.2487836331129074, 0.2073095589876175], "labels": ["graph", "number", "number", "graph", "graph", "number", "number"]}, {"id": "VS_table_1_7_1_5", "boxes": [[363, 517, 380, 541], [573, 517, 590, 541], [784, 517, 801, 541]], "scores": [0.20055559277534485, 0.23014028370380402, 0.2260674685239792], "labels": ["number", "number", "number"]}, {"id": "VS_map_2_8_2_0", "boxes": [[186, 142, 2753, 1612], [-64, 232, 2589, 1636], [1457, 116, 2794, 1604], [-20, -28, 2735, 1697]], "scores": [0.419409841299057, 0.20032469928264618, 0.5844330191612244, 0.42787256836891174], "labels": ["map", "area", "map", "area"]}, {"id": "VD_illusion_1_8_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_6_0_2", "boxes": [[4, 13, 833, 654], [120, 35, 718, 631]], "scores": [0.28878849744796753, 0.8242736458778381], "labels": ["shape", "shape"]}, {"id": "VD_ocr_2_16_1_1", "boxes": [[31, 16, 301, 167], [34, 19, 300, 362], [44, 109, 290, 363], [108, 217, 233, 297]], "scores": [0.5115664601325989, 0.38438108563423157, 0.439646452665329, 0.4568607211112976], "labels": ["icecream", "icecream", "cup", "icecream"]}, {"id": "VD_video_1_5_0_1", "boxes": [[1338, 6, 1578, 553], [6, 2, 1718, 575], [662, 2, 1057, 550], [137, 14, 430, 554], [766, 5, 1027, 553], [246, 332, 394, 545], [1371, 395, 1478, 547], [809, 374, 961, 546]], "scores": [0.5115486979484558, 0.2030431628227234, 0.5379964709281921, 0.5533844232559204, 0.3412485718727112, 0.5184046626091003, 0.38962435722351074, 0.4778502583503723], "labels": ["child", "photo", "child", "child", "child", "jeans", "jeans", "jeans"]}, {"id": "VS_chart_1_1_1_1", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_2_14_2_1", "boxes": [[-34, 13, 1828, 1281], [580, 71, 596, 95], [465, 70, 484, 95], [216, 71, 1955, 1165], [1297, 557, 1874, 1128], [465, 1129, 484, 1154], [577, 1129, 596, 1154]], "scores": [0.3566090762615204, 0.216477170586586, 0.2467927485704422, 0.4424324035644531, 0.3887743651866913, 0.2487836331129074, 0.2073095589876175], "labels": ["graph", "number", "number", "graph", "graph", "number", "number"]}, {"id": "VS_table_2_7_2_0", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_2_8_2_1", "boxes": [[186, 142, 2753, 1612], [-64, 232, 2589, 1636], [1457, 116, 2794, 1604], [-20, -28, 2735, 1697]], "scores": [0.419409841299057, 0.20032469928264618, 0.5844330191612244, 0.42787256836891174], "labels": ["map", "area", "map", "area"]}, {"id": "VD_illusion_2_8_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_6_1_0", "boxes": [[1, 0, 704, 687], [134, 72, 397, 434], [53, 70, 686, 662], [15, 510, 708, 535]], "scores": [0.308248370885849, 0.22482998669147491, 0.7382161617279053, 0.20280712842941284], "labels": ["shape", "shape", "shape", "line"]}, {"id": "VD_ocr_1_17_0_0", "boxes": [[-2, 4, 1062, 734], [292, 82, 804, 742], [296, 311, 805, 741]], "scores": [0.2012302279472351, 0.5766072273254395, 0.231394961476326], "labels": ["cinema", "detective", "business suit"]}, {"id": "VD_video_1_5_0_2", "boxes": [[1338, 6, 1578, 553], [6, 2, 1718, 575], [662, 2, 1057, 550], [137, 14, 430, 554], [766, 5, 1027, 553], [246, 332, 394, 545], [1371, 395, 1478, 547], [809, 374, 961, 546]], "scores": [0.5115486979484558, 0.2030431628227234, 0.5379964709281921, 0.5533844232559204, 0.3412485718727112, 0.5184046626091003, 0.38962435722351074, 0.4778502583503723], "labels": ["child", "photo", "child", "child", "child", "jeans", "jeans", "jeans"]}, {"id": "VS_chart_1_1_1_2", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_2_14_2_2", "boxes": [[-34, 13, 1828, 1281], [580, 71, 596, 95], [465, 70, 484, 95], [216, 71, 1955, 1165], [1297, 557, 1874, 1128], [465, 1129, 484, 1154], [577, 1129, 596, 1154]], "scores": [0.3566090762615204, 0.216477170586586, 0.2467927485704422, 0.4424324035644531, 0.3887743651866913, 0.2487836331129074, 0.2073095589876175], "labels": ["graph", "number", "number", "graph", "graph", "number", "number"]}, {"id": "VS_table_2_7_2_1", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_2_8_2_2", "boxes": [[186, 142, 2753, 1612], [-64, 232, 2589, 1636], [1457, 116, 2794, 1604], [-20, -28, 2735, 1697]], "scores": [0.419409841299057, 0.20032469928264618, 0.5844330191612244, 0.42787256836891174], "labels": ["map", "area", "map", "area"]}, {"id": "VD_illusion_2_8_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_6_1_1", "boxes": [[1, 0, 704, 687], [134, 72, 397, 434], [53, 70, 686, 662], [15, 510, 708, 535]], "scores": [0.308248370885849, 0.22482998669147491, 0.7382161617279053, 0.20280712842941284], "labels": ["shape", "shape", "shape", "line"]}, {"id": "VD_ocr_1_17_0_1", "boxes": [[-2, 4, 1062, 734], [292, 82, 804, 742], [296, 311, 805, 741]], "scores": [0.2012302279472351, 0.5766072273254395, 0.231394961476326], "labels": ["cinema", "detective", "business suit"]}, {"id": "VD_video_1_5_0_3", "boxes": [[1338, 6, 1578, 553], [6, 2, 1718, 575], [662, 2, 1057, 550], [137, 14, 430, 554], [766, 5, 1027, 553], [246, 332, 394, 545], [1371, 395, 1478, 547], [809, 374, 961, 546]], "scores": [0.5115486979484558, 0.2030431628227234, 0.5379964709281921, 0.5533844232559204, 0.3412485718727112, 0.5184046626091003, 0.38962435722351074, 0.4778502583503723], "labels": ["child", "photo", "child", "child", "child", "jeans", "jeans", "jeans"]}, {"id": "VS_chart_1_1_1_3", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_0_15_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_7_2_2", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_0_9_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_8_1_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_6_1_2", "boxes": [[1, 0, 704, 687], [134, 72, 397, 434], [53, 70, 686, 662], [15, 510, 708, 535]], "scores": [0.308248370885849, 0.22482998669147491, 0.7382161617279053, 0.20280712842941284], "labels": ["shape", "shape", "shape", "line"]}, {"id": "VD_ocr_2_17_1_0", "boxes": [[2, 11, 628, 468], [93, 62, 326, 213], [382, 73, 529, 159], [3, 61, 635, 421], [41, 67, 376, 417], [310, 73, 613, 417]], "scores": [0.3224612772464752, 0.4430542290210724, 0.4085186719894409, 0.22794577479362488, 0.2270522266626358, 0.21940995752811432], "labels": ["cinema", "cowboy hat", "cowboy hat", "cowboy hat", "woman", "man"]}, {"id": "VD_video_2_5_1_0", "boxes": [[774, 8, 1025, 562], [1335, 20, 1611, 560], [173, 8, 411, 562], [666, 7, 1062, 560], [1429, 346, 1577, 554], [199, 403, 315, 556], [813, 383, 969, 554], [5, 7, 1736, 578]], "scores": [0.21150405704975128, 0.5106663703918457, 0.5478421449661255, 0.5599082708358765, 0.4757132828235626, 0.4065094292163849, 0.47630974650382996, 0.2121441662311554], "labels": ["child", "child", "child", "child", "jeans", "jeans", "jeans", "photo"]}, {"id": "VS_chart_1_1_1_4", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_0_15_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_7_2_3", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_0_9_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_9_0_0", "boxes": [[24, 10, 786, 971], [283, 723, 566, 977]], "scores": [0.9254916310310364, 0.22013528645038605], "labels": ["shape", "triangle"]}, {"id": "VD_math_1_7_0_0", "boxes": [[1, 1, 495, 495], [166, 2, 479, 321], [311, 149, 333, 176], [29, 6, 478, 328], [29, 7, 479, 485], [29, 187, 171, 322], [172, 188, 340, 323], [172, 286, 202, 318], [29, 187, 346, 487], [173, 316, 340, 484]], "scores": [0.2974092364311218, 0.6903642416000366, 0.2331201285123825, 0.22238896787166595, 0.5276613831520081, 0.43903011083602905, 0.30950576066970825, 0.2921477258205414, 0.23506124317646027, 0.48260927200317383], "labels": ["shape", "shape", "triangle", "shape", "shape", "square", "shape", "square", "shape", "square"]}, {"id": "VD_ocr_2_17_1_1", "boxes": [[2, 11, 628, 468], [93, 62, 326, 213], [382, 73, 529, 159], [3, 61, 635, 421], [41, 67, 376, 417], [310, 73, 613, 417]], "scores": [0.3224612772464752, 0.4430542290210724, 0.4085186719894409, 0.22794577479362488, 0.2270522266626358, 0.21940995752811432], "labels": ["cinema", "cowboy hat", "cowboy hat", "cowboy hat", "woman", "man"]}, {"id": "VD_video_2_5_1_1", "boxes": [[774, 8, 1025, 562], [1335, 20, 1611, 560], [173, 8, 411, 562], [666, 7, 1062, 560], [1429, 346, 1577, 554], [199, 403, 315, 556], [813, 383, 969, 554], [5, 7, 1736, 578]], "scores": [0.21150405704975128, 0.5106663703918457, 0.5478421449661255, 0.5599082708358765, 0.4757132828235626, 0.4065094292163849, 0.47630974650382996, 0.2121441662311554], "labels": ["child", "child", "child", "child", "jeans", "jeans", "jeans", "photo"]}, {"id": "VS_chart_1_1_1_5", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_1_15_1_0", "boxes": [[2, 1, 986, 751], [91, 86, 955, 570], [156, 166, 932, 569]], "scores": [0.37226516008377075, 0.4514930844306946, 0.25988197326660156], "labels": ["graph", "graph", "graph"]}, {"id": "VS_table_2_7_2_4", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_1_9_1_0", "boxes": [[-1, -2, 762, 649], [15, 22, 671, 594], [4, 21, 750, 600], [1, 8, 761, 788]], "scores": [0.2009141743183136, 0.4125623106956482, 0.525603711605072, 0.28959617018699646], "labels": ["map", "map", "map", "map"]}, {"id": "VD_illusion_1_9_0_1", "boxes": [[24, 10, 786, 971], [283, 723, 566, 977]], "scores": [0.9254916310310364, 0.22013528645038605], "labels": ["shape", "triangle"]}, {"id": "VD_math_1_7_0_1", "boxes": [[1, 1, 495, 495], [166, 2, 479, 321], [311, 149, 333, 176], [29, 6, 478, 328], [29, 7, 479, 485], [29, 187, 171, 322], [172, 188, 340, 323], [172, 286, 202, 318], [29, 187, 346, 487], [173, 316, 340, 484]], "scores": [0.2974092364311218, 0.6903642416000366, 0.2331201285123825, 0.22238896787166595, 0.5276613831520081, 0.43903011083602905, 0.30950576066970825, 0.2921477258205414, 0.23506124317646027, 0.48260927200317383], "labels": ["shape", "shape", "triangle", "shape", "shape", "square", "shape", "square", "shape", "square"]}, {"id": "VD_ocr_1_18_0_0", "boxes": [[0, 3, 488, 374], [378, 66, 459, 249], [113, 26, 209, 369], [43, 69, 124, 238], [192, 29, 327, 363], [43, 70, 126, 372], [300, 36, 399, 369]], "scores": [0.40760064125061035, 0.24838373064994812, 0.2454756498336792, 0.2448863685131073, 0.24765776097774506, 0.23886339366436005, 0.26264479756355286], "labels": ["drama", "person", "person", "person", "person", "person", "person"]}, {"id": "VD_video_2_5_1_2", "boxes": [[774, 8, 1025, 562], [1335, 20, 1611, 560], [173, 8, 411, 562], [666, 7, 1062, 560], [1429, 346, 1577, 554], [199, 403, 315, 556], [813, 383, 969, 554], [5, 7, 1736, 578]], "scores": [0.21150405704975128, 0.5106663703918457, 0.5478421449661255, 0.5599082708358765, 0.4757132828235626, 0.4065094292163849, 0.47630974650382996, 0.2121441662311554], "labels": ["child", "child", "child", "child", "jeans", "jeans", "jeans", "photo"]}, {"id": "VS_chart_1_1_1_6", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_1_15_1_1", "boxes": [[2, 1, 986, 751], [91, 86, 955, 570], [156, 166, 932, 569]], "scores": [0.37226516008377075, 0.4514930844306946, 0.25988197326660156], "labels": ["graph", "graph", "graph"]}, {"id": "VS_table_2_7_2_5", "boxes": [[619, 453, 636, 477], [409, 549, 426, 574], [619, 549, 636, 573], [830, 549, 847, 573]], "scores": [0.21691906452178955, 0.20572984218597412, 0.20994484424591064, 0.2039148062467575], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_1_9_1_1", "boxes": [[-1, -2, 762, 649], [15, 22, 671, 594], [4, 21, 750, 600], [1, 8, 761, 788]], "scores": [0.2009141743183136, 0.4125623106956482, 0.525603711605072, 0.28959617018699646], "labels": ["map", "map", "map", "map"]}, {"id": "VD_illusion_2_9_1_0", "boxes": [[22, 13, 726, 902]], "scores": [0.724507212638855], "labels": ["triangle"]}, {"id": "VD_math_1_7_0_2", "boxes": [[1, 1, 495, 495], [166, 2, 479, 321], [311, 149, 333, 176], [29, 6, 478, 328], [29, 7, 479, 485], [29, 187, 171, 322], [172, 188, 340, 323], [172, 286, 202, 318], [29, 187, 346, 487], [173, 316, 340, 484]], "scores": [0.2974092364311218, 0.6903642416000366, 0.2331201285123825, 0.22238896787166595, 0.5276613831520081, 0.43903011083602905, 0.30950576066970825, 0.2921477258205414, 0.23506124317646027, 0.48260927200317383], "labels": ["shape", "shape", "triangle", "shape", "shape", "square", "shape", "square", "shape", "square"]}, {"id": "VD_ocr_1_18_0_1", "boxes": [[0, 3, 488, 374], [378, 66, 459, 249], [113, 26, 209, 369], [43, 69, 124, 238], [192, 29, 327, 363], [43, 70, 126, 372], [300, 36, 399, 369]], "scores": [0.40760064125061035, 0.24838373064994812, 0.2454756498336792, 0.2448863685131073, 0.24765776097774506, 0.23886339366436005, 0.26264479756355286], "labels": ["drama", "person", "person", "person", "person", "person", "person"]}, {"id": "VD_video_2_5_1_3", "boxes": [[774, 8, 1025, 562], [1335, 20, 1611, 560], [173, 8, 411, 562], [666, 7, 1062, 560], [1429, 346, 1577, 554], [199, 403, 315, 556], [813, 383, 969, 554], [5, 7, 1736, 578]], "scores": [0.21150405704975128, 0.5106663703918457, 0.5478421449661255, 0.5599082708358765, 0.4757132828235626, 0.4065094292163849, 0.47630974650382996, 0.2121441662311554], "labels": ["child", "child", "child", "child", "jeans", "jeans", "jeans", "photo"]}, {"id": "VS_chart_1_1_1_7", "boxes": [[2, 4, 1300, 1040], [268, 183, 1008, 922], [288, 233, 1019, 926], [410, 948, 434, 978], [614, 948, 637, 979]], "scores": [0.37773260474205017, 0.41211244463920593, 0.2080252468585968, 0.22794753313064575, 0.21767394244670868], "labels": ["map", "graph", "plot", "number", "number"]}, {"id": "VS_chart_2_15_2_0", "boxes": [[2, 1, 987, 752], [102, 84, 954, 570], [172, 260, 940, 575], [110, 566, 118, 576]], "scores": [0.36597761511802673, 0.43472638726234436, 0.24557450413703918, 0.20260660350322723], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_2_7_3_0", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_2_9_2_0", "boxes": [[16, 21, 648, 593], [2, 23, 761, 603], [1, 8, 761, 788]], "scores": [0.3820439875125885, 0.5312814712524414, 0.3000256419181824], "labels": ["map", "map", "map"]}, {"id": "VD_illusion_2_9_1_1", "boxes": [[22, 13, 726, 902]], "scores": [0.724507212638855], "labels": ["triangle"]}, {"id": "VD_math_2_7_1_0", "boxes": [[183, 1, 506, 328], [4, 0, 503, 332], [331, 155, 357, 186], [1, 143, 191, 327], [158, 169, 335, 323], [0, 0, 517, 548], [3, 1, 506, 536], [158, 319, 341, 534]], "scores": [0.6760486960411072, 0.41743505001068115, 0.22102607786655426, 0.5887972116470337, 0.28357380628585815, 0.23172566294670105, 0.4583894610404968, 0.5595564842224121], "labels": ["shape", "shape", "triangle", "shape", "shape", "shape", "shape", "shape"]}, {"id": "VD_ocr_2_18_1_0", "boxes": [[1, 4, 505, 385], [199, 28, 323, 222], [306, 36, 410, 251], [46, 71, 128, 241], [391, 68, 473, 255], [119, 27, 220, 371], [46, 72, 131, 381], [310, 37, 411, 378]], "scores": [0.4570632576942444, 0.2622416317462921, 0.252458781003952, 0.26077622175216675, 0.2607741355895996, 0.24581214785575867, 0.2068369835615158, 0.2207636535167694], "labels": ["drama", "person", "person", "person", "person", "person", "person", "person"]}, {"id": "VD_video_1_6_0_0", "boxes": [[487, 22, 577, 568], [829, 23, 914, 539], [154, 28, 246, 586], [0, 10, 1053, 768], [703, 449, 1051, 754], [20, 488, 341, 754], [18, 464, 1052, 767], [351, 472, 680, 760]], "scores": [0.6700153350830078, 0.6576653122901917, 0.6632409691810608, 0.48788848519325256, 0.3749304711818695, 0.3312027156352997, 0.25677716732025146, 0.3573992848396301], "labels": ["thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_15_2_1", "boxes": [[2, 1, 987, 752], [102, 84, 954, 570], [172, 260, 940, 575], [110, 566, 118, 576]], "scores": [0.36597761511802673, 0.43472638726234436, 0.24557450413703918, 0.20260660350322723], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_2_7_3_1", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_2_9_2_1", "boxes": [[16, 21, 648, 593], [2, 23, 761, 603], [1, 8, 761, 788]], "scores": [0.3820439875125885, 0.5312814712524414, 0.3000256419181824], "labels": ["map", "map", "map"]}, {"id": "VD_illusion_1_10_0_0", "boxes": [[2, -1, 594, 450], [370, 26, 509, 256], [33, 109, 565, 433]], "scores": [0.22093693912029266, 0.5266229510307312, 0.6836827397346497], "labels": ["illustration", "cylinder", "chessboard"]}, {"id": "VD_math_2_7_1_1", "boxes": [[183, 1, 506, 328], [4, 0, 503, 332], [331, 155, 357, 186], [1, 143, 191, 327], [158, 169, 335, 323], [0, 0, 517, 548], [3, 1, 506, 536], [158, 319, 341, 534]], "scores": [0.6760486960411072, 0.41743505001068115, 0.22102607786655426, 0.5887972116470337, 0.28357380628585815, 0.23172566294670105, 0.4583894610404968, 0.5595564842224121], "labels": ["shape", "shape", "triangle", "shape", "shape", "shape", "shape", "shape"]}, {"id": "VD_ocr_2_18_1_1", "boxes": [[1, 4, 505, 385], [199, 28, 323, 222], [306, 36, 410, 251], [46, 71, 128, 241], [391, 68, 473, 255], [119, 27, 220, 371], [46, 72, 131, 381], [310, 37, 411, 378]], "scores": [0.4570632576942444, 0.2622416317462921, 0.252458781003952, 0.26077622175216675, 0.2607741355895996, 0.24581214785575867, 0.2068369835615158, 0.2207636535167694], "labels": ["drama", "person", "person", "person", "person", "person", "person", "person"]}, {"id": "VD_video_1_6_0_1", "boxes": [[487, 22, 577, 568], [829, 23, 914, 539], [154, 28, 246, 586], [0, 10, 1053, 768], [703, 449, 1051, 754], [20, 488, 341, 754], [18, 464, 1052, 767], [351, 472, 680, 760]], "scores": [0.6700153350830078, 0.6576653122901917, 0.6632409691810608, 0.48788848519325256, 0.3749304711818695, 0.3312027156352997, 0.25677716732025146, 0.3573992848396301], "labels": ["thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_16_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_7_3_2", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_0_10_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_10_0_1", "boxes": [[2, -1, 594, 450], [370, 26, 509, 256], [33, 109, 565, 433]], "scores": [0.22093693912029266, 0.5266229510307312, 0.6836827397346497], "labels": ["illustration", "cylinder", "chessboard"]}, {"id": "VD_math_2_7_1_2", "boxes": [[183, 1, 506, 328], [4, 0, 503, 332], [331, 155, 357, 186], [1, 143, 191, 327], [158, 169, 335, 323], [0, 0, 517, 548], [3, 1, 506, 536], [158, 319, 341, 534]], "scores": [0.6760486960411072, 0.41743505001068115, 0.22102607786655426, 0.5887972116470337, 0.28357380628585815, 0.23172566294670105, 0.4583894610404968, 0.5595564842224121], "labels": ["shape", "shape", "triangle", "shape", "shape", "shape", "shape", "shape"]}, {"id": "VD_ocr_1_19_0_0", "boxes": [[26, 126, 181, 250]], "scores": [0.5160567760467529], "labels": ["group photo"]}, {"id": "VD_video_1_6_0_2", "boxes": [[487, 22, 577, 568], [829, 23, 914, 539], [154, 28, 246, 586], [0, 10, 1053, 768], [703, 449, 1051, 754], [20, 488, 341, 754], [18, 464, 1052, 767], [351, 472, 680, 760]], "scores": [0.6700153350830078, 0.6576653122901917, 0.6632409691810608, 0.48788848519325256, 0.3749304711818695, 0.3312027156352997, 0.25677716732025146, 0.3573992848396301], "labels": ["thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_16_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_7_3_3", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_0_10_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_10_1_0", "boxes": [[2, -1, 593, 451], [374, 24, 514, 253], [257, 216, 357, 272], [39, 108, 568, 432]], "scores": [0.23170141875743866, 0.5406856536865234, 0.32793325185775757, 0.7009625434875488], "labels": ["illustration", "cylinder", "square", "chessboard"]}, {"id": "VD_math_1_8_0_0", "boxes": [[172, 0, 191, 31], [-2, 0, 370, 308], [268, 38, 306, 79], [55, 35, 310, 274], [17, 138, 38, 168], [57, 234, 97, 274], [267, 234, 306, 274]], "scores": [0.28088507056236267, 0.40849101543426514, 0.40935108065605164, 0.5783409476280212, 0.2994343042373657, 0.32573646306991577, 0.31850627064704895], "labels": ["number", "rectangle", "square", "square", "number", "square", "square"]}, {"id": "VD_ocr_1_19_0_1", "boxes": [[26, 126, 181, 250]], "scores": [0.5160567760467529], "labels": ["group photo"]}, {"id": "VD_video_1_6_0_3", "boxes": [[487, 22, 577, 568], [829, 23, 914, 539], [154, 28, 246, 586], [0, 10, 1053, 768], [703, 449, 1051, 754], [20, 488, 341, 754], [18, 464, 1052, 767], [351, 472, 680, 760]], "scores": [0.6700153350830078, 0.6576653122901917, 0.6632409691810608, 0.48788848519325256, 0.3749304711818695, 0.3312027156352997, 0.25677716732025146, 0.3573992848396301], "labels": ["thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_16_1_0", "boxes": [[8, -1, 929, 697], [90, 110, 546, 577], [205, 228, 437, 469], [639, 263, 907, 448]], "scores": [0.431403249502182, 0.41381755471229553, 0.49653056263923645, 0.2718586027622223], "labels": ["graph", "circle", "circle", "graph"]}, {"id": "VS_table_2_7_3_4", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_1_10_1_0", "boxes": [[0, 3, 581, 409], [45, 38, 545, 364], [44, 43, 486, 364]], "scores": [0.3426244258880615, 0.5451511144638062, 0.30156224966049194], "labels": ["map", "map", "continent"]}, {"id": "VD_illusion_2_10_1_1", "boxes": [[2, -1, 593, 451], [374, 24, 514, 253], [257, 216, 357, 272], [39, 108, 568, 432]], "scores": [0.23170141875743866, 0.5406856536865234, 0.32793325185775757, 0.7009625434875488], "labels": ["illustration", "cylinder", "square", "chessboard"]}, {"id": "VD_math_1_8_0_1", "boxes": [[172, 0, 191, 31], [-2, 0, 370, 308], [268, 38, 306, 79], [55, 35, 310, 274], [17, 138, 38, 168], [57, 234, 97, 274], [267, 234, 306, 274]], "scores": [0.28088507056236267, 0.40849101543426514, 0.40935108065605164, 0.5783409476280212, 0.2994343042373657, 0.32573646306991577, 0.31850627064704895], "labels": ["number", "rectangle", "square", "square", "number", "square", "square"]}, {"id": "VD_ocr_2_19_1_0", "boxes": [[129, 130, 169, 155]], "scores": [0.2130771279335022], "labels": ["poster"]}, {"id": "VD_video_2_6_1_0", "boxes": [[511, 19, 600, 565], [151, 21, 237, 537], [900, 53, 938, 572], [862, 30, 954, 589], [2, 9, 1058, 766], [21, 446, 363, 750], [375, 470, 708, 758], [16, 465, 1054, 763], [729, 486, 1056, 753]], "scores": [0.6669855713844299, 0.6331871151924133, 0.20063455402851105, 0.6892991065979004, 0.5199108123779297, 0.351874977350235, 0.33606600761413574, 0.25724855065345764, 0.33448633551597595], "labels": ["thermometer", "thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_4", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_16_1_1", "boxes": [[8, -1, 929, 697], [90, 110, 546, 577], [205, 228, 437, 469], [639, 263, 907, 448]], "scores": [0.431403249502182, 0.41381755471229553, 0.49653056263923645, 0.2718586027622223], "labels": ["graph", "circle", "circle", "graph"]}, {"id": "VS_table_2_7_3_5", "boxes": [[359, 318, 376, 342], [569, 317, 586, 342], [780, 318, 797, 342], [780, 414, 798, 439]], "scores": [0.22188609838485718, 0.23144122958183289, 0.22357282042503357, 0.201569065451622], "labels": ["number", "number", "number", "number"]}, {"id": "VS_map_1_10_1_1", "boxes": [[0, 3, 581, 409], [45, 38, 545, 364], [44, 43, 486, 364]], "scores": [0.3426244258880615, 0.5451511144638062, 0.30156224966049194], "labels": ["map", "map", "continent"]}, {"id": "VD_illusion_1_11_0_0", "boxes": [[37, 124, 219, 307], [341, 155, 459, 274], [82, 168, 175, 262], [40, 120, 461, 304], [357, 169, 447, 261]], "scores": [0.6070772409439087, 0.5777572989463806, 0.31801193952560425, 0.2088954895734787, 0.24505388736724854], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_1_8_0_2", "boxes": [[172, 0, 191, 31], [-2, 0, 370, 308], [268, 38, 306, 79], [55, 35, 310, 274], [17, 138, 38, 168], [57, 234, 97, 274], [267, 234, 306, 274]], "scores": [0.28088507056236267, 0.40849101543426514, 0.40935108065605164, 0.5783409476280212, 0.2994343042373657, 0.32573646306991577, 0.31850627064704895], "labels": ["number", "rectangle", "square", "square", "number", "square", "square"]}, {"id": "VD_ocr_2_19_1_1", "boxes": [[129, 130, 169, 155]], "scores": [0.2130771279335022], "labels": ["poster"]}, {"id": "VD_video_2_6_1_1", "boxes": [[511, 19, 600, 565], [151, 21, 237, 537], [900, 53, 938, 572], [862, 30, 954, 589], [2, 9, 1058, 766], [21, 446, 363, 750], [375, 470, 708, 758], [16, 465, 1054, 763], [729, 486, 1056, 753]], "scores": [0.6669855713844299, 0.6331871151924133, 0.20063455402851105, 0.6892991065979004, 0.5199108123779297, 0.351874977350235, 0.33606600761413574, 0.25724855065345764, 0.33448633551597595], "labels": ["thermometer", "thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_5", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_16_2_0", "boxes": [[8, 0, 920, 699], [79, 107, 535, 575], [193, 224, 425, 466], [631, 261, 895, 444]], "scores": [0.4338182508945465, 0.42602649331092834, 0.501087486743927, 0.27270472049713135], "labels": ["graph", "circle", "circle", "graph"]}, {"id": "VS_table_0_8_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_10_2_0", "boxes": [[0, 3, 580, 410], [45, 43, 480, 364], [43, 39, 538, 365], [39, 143, 558, 150]], "scores": [0.3495638370513916, 0.21252551674842834, 0.574690580368042, 0.2232644259929657], "labels": ["map", "continent", "map", "line"]}, {"id": "VD_illusion_1_11_0_1", "boxes": [[37, 124, 219, 307], [341, 155, 459, 274], [82, 168, 175, 262], [40, 120, 461, 304], [357, 169, 447, 261]], "scores": [0.6070772409439087, 0.5777572989463806, 0.31801193952560425, 0.2088954895734787, 0.24505388736724854], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_2_8_1_0", "boxes": [[88, 88, 493, 352]], "scores": [0.5228543877601624], "labels": ["triangle"]}, {"id": "VD_ocr_1_20_0_0", "boxes": [[40, 196, 347, 478], [322, 194, 575, 350], [250, 288, 576, 485], [27, 370, 583, 568]], "scores": [0.31381794810295105, 0.2806321084499359, 0.26889216899871826, 0.2411382645368576], "labels": ["pastry", "mint", "pastry", "plate"]}, {"id": "VD_video_2_6_1_2", "boxes": [[511, 19, 600, 565], [151, 21, 237, 537], [900, 53, 938, 572], [862, 30, 954, 589], [2, 9, 1058, 766], [21, 446, 363, 750], [375, 470, 708, 758], [16, 465, 1054, 763], [729, 486, 1056, 753]], "scores": [0.6669855713844299, 0.6331871151924133, 0.20063455402851105, 0.6892991065979004, 0.5199108123779297, 0.351874977350235, 0.33606600761413574, 0.25724855065345764, 0.33448633551597595], "labels": ["thermometer", "thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_6", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_16_2_1", "boxes": [[8, 0, 920, 699], [79, 107, 535, 575], [193, 224, 425, 466], [631, 261, 895, 444]], "scores": [0.4338182508945465, 0.42602649331092834, 0.501087486743927, 0.27270472049713135], "labels": ["graph", "circle", "circle", "graph"]}, {"id": "VS_table_0_8_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_10_2_1", "boxes": [[0, 3, 580, 410], [45, 43, 480, 364], [43, 39, 538, 365], [39, 143, 558, 150]], "scores": [0.3495638370513916, 0.21252551674842834, 0.574690580368042, 0.2232644259929657], "labels": ["map", "continent", "map", "line"]}, {"id": "VD_illusion_1_11_0_2", "boxes": [[37, 124, 219, 307], [341, 155, 459, 274], [82, 168, 175, 262], [40, 120, 461, 304], [357, 169, 447, 261]], "scores": [0.6070772409439087, 0.5777572989463806, 0.31801193952560425, 0.2088954895734787, 0.24505388736724854], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_2_8_1_1", "boxes": [[88, 88, 493, 352]], "scores": [0.5228543877601624], "labels": ["triangle"]}, {"id": "VD_ocr_1_20_0_1", "boxes": [[40, 196, 347, 478], [322, 194, 575, 350], [250, 288, 576, 485], [27, 370, 583, 568]], "scores": [0.31381794810295105, 0.2806321084499359, 0.26889216899871826, 0.2411382645368576], "labels": ["pastry", "mint", "pastry", "plate"]}, {"id": "VD_video_2_6_1_3", "boxes": [[511, 19, 600, 565], [151, 21, 237, 537], [900, 53, 938, 572], [862, 30, 954, 589], [2, 9, 1058, 766], [21, 446, 363, 750], [375, 470, 708, 758], [16, 465, 1054, 763], [729, 486, 1056, 753]], "scores": [0.6669855713844299, 0.6331871151924133, 0.20063455402851105, 0.6892991065979004, 0.5199108123779297, 0.351874977350235, 0.33606600761413574, 0.25724855065345764, 0.33448633551597595], "labels": ["thermometer", "thermometer", "thermometer", "thermometer", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_chart_0_2_0_7", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_17_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_8_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_0_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_11_1_0", "boxes": [[54, 94, 234, 276], [356, 125, 475, 244], [372, 139, 463, 232], [106, 144, 183, 226], [57, 91, 477, 274]], "scores": [0.5894169807434082, 0.5556641221046448, 0.2313496321439743, 0.28692105412483215, 0.20025362074375153], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_2_8_1_2", "boxes": [[88, 88, 493, 352]], "scores": [0.5228543877601624], "labels": ["triangle"]}, {"id": "VD_ocr_2_20_1_0", "boxes": [[38, 200, 354, 490], [332, 200, 588, 359], [255, 296, 590, 498], [23, 381, 598, 583]], "scores": [0.3123084008693695, 0.29148757457733154, 0.27008336782455444, 0.2554249167442322], "labels": ["pastry", "mint", "pastry", "plate"]}, {"id": "VD_video_1_7_0_0", "boxes": [[733, 5, 1025, 328], [79, 10, 503, 358], [1358, 8, 1502, 327], [696, 1, 1028, 380], [22, 205, 169, 408], [676, 199, 830, 411], [674, 199, 975, 409], [219, 256, 412, 373], [710, 266, 791, 341], [1287, 204, 1453, 418], [219, 278, 274, 334], [760, 281, 818, 339]], "scores": [0.303181916475296, 0.3773871064186096, 0.2636565566062927, 0.2633042633533478, 0.5666344165802002, 0.580400288105011, 0.3132399022579193, 0.3139100670814514, 0.2787001132965088, 0.33315736055374146, 0.2077069878578186, 0.23065535724163055], "labels": ["cable", "cable", "cable", "cable", "socket", "socket", "socket", "plug", "connect", "socket", "connector", "connector"]}, {"id": "VS_chart_1_2_1_0", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_0_17_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_8_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_0_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_11_1_1", "boxes": [[54, 94, 234, 276], [356, 125, 475, 244], [372, 139, 463, 232], [106, 144, 183, 226], [57, 91, 477, 274]], "scores": [0.5894169807434082, 0.5556641221046448, 0.2313496321439743, 0.28692105412483215, 0.20025362074375153], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_1_9_0_0", "boxes": [[25, 24, 235, 251], [113, 109, 133, 131]], "scores": [0.529377281665802, 0.23372882604599], "labels": ["circle", "circle"]}, {"id": "VD_ocr_2_20_1_1", "boxes": [[38, 200, 354, 490], [332, 200, 588, 359], [255, 296, 590, 498], [23, 381, 598, 583]], "scores": [0.3123084008693695, 0.29148757457733154, 0.27008336782455444, 0.2554249167442322], "labels": ["pastry", "mint", "pastry", "plate"]}, {"id": "VD_video_1_7_0_1", "boxes": [[733, 5, 1025, 328], [79, 10, 503, 358], [1358, 8, 1502, 327], [696, 1, 1028, 380], [22, 205, 169, 408], [676, 199, 830, 411], [674, 199, 975, 409], [219, 256, 412, 373], [710, 266, 791, 341], [1287, 204, 1453, 418], [219, 278, 274, 334], [760, 281, 818, 339]], "scores": [0.303181916475296, 0.3773871064186096, 0.2636565566062927, 0.2633042633533478, 0.5666344165802002, 0.580400288105011, 0.3132399022579193, 0.3139100670814514, 0.2787001132965088, 0.33315736055374146, 0.2077069878578186, 0.23065535724163055], "labels": ["cable", "cable", "cable", "cable", "socket", "socket", "socket", "plug", "connect", "socket", "connector", "connector"]}, {"id": "VS_chart_1_2_1_1", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_0_17_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_8_0_4", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_0_1_0", "boxes": [[274, 7, 307, 80], [396, 7, 432, 81], [184, 6, 225, 81], [327, 7, 372, 80], [455, 6, 493, 81], [17, 173, 50, 224]], "scores": [0.3115593492984772, 0.27353614568710327, 0.2331414371728897, 0.3268182575702667, 0.2615271210670471, 0.20510555803775787], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VD_illusion_2_11_1_2", "boxes": [[54, 94, 234, 276], [356, 125, 475, 244], [372, 139, 463, 232], [106, 144, 183, 226], [57, 91, 477, 274]], "scores": [0.5894169807434082, 0.5556641221046448, 0.2313496321439743, 0.28692105412483215, 0.20025362074375153], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_math_1_9_0_1", "boxes": [[25, 24, 235, 251], [113, 109, 133, 131]], "scores": [0.529377281665802, 0.23372882604599], "labels": ["circle", "circle"]}, {"id": "VD_figure_1_0_0_0", "boxes": [[4, 2, 1305, 897], [832, 140, 904, 313], [843, -3, 1133, 504], [5, 236, 436, 391], [99, 292, 138, 463], [98, 195, 381, 795], [431, 243, 786, 783], [1031, 308, 1254, 885], [50, 338, 465, 510], [721, 236, 1071, 732], [318, 397, 461, 497], [3, 314, 214, 716], [324, 399, 630, 635], [311, 294, 426, 671], [5, 458, 216, 713], [932, 321, 1317, 818]], "scores": [0.631056547164917, 0.303212970495224, 0.21869544684886932, 0.4284714460372925, 0.20659829676151276, 0.23105856776237488, 0.26237642765045166, 0.24337910115718842, 0.21127581596374512, 0.4582611620426178, 0.29782116413116455, 0.27542781829833984, 0.30079713463783264, 0.20516259968280792, 0.23240478336811066, 0.38158318400382996], "labels": ["illustration", "spear", "figurine", "ax", "spear", "figurine", "illustration", "child", "spear", "donkey", "sword", "broom", "ax", "broom", "broom", "spear"]}, {"id": "VD_video_1_7_0_2", "boxes": [[733, 5, 1025, 328], [79, 10, 503, 358], [1358, 8, 1502, 327], [696, 1, 1028, 380], [22, 205, 169, 408], [676, 199, 830, 411], [674, 199, 975, 409], [219, 256, 412, 373], [710, 266, 791, 341], [1287, 204, 1453, 418], [219, 278, 274, 334], [760, 281, 818, 339]], "scores": [0.303181916475296, 0.3773871064186096, 0.2636565566062927, 0.2633042633533478, 0.5666344165802002, 0.580400288105011, 0.3132399022579193, 0.3139100670814514, 0.2787001132965088, 0.33315736055374146, 0.2077069878578186, 0.23065535724163055], "labels": ["cable", "cable", "cable", "cable", "socket", "socket", "socket", "plug", "connect", "socket", "connector", "connector"]}, {"id": "VS_chart_1_2_1_2", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_1_17_1_0", "boxes": [[741, 522, 975, 861], [1207, 591, 1465, 929], [176, 768, 233, 842], [764, 570, 969, 868], [1210, 609, 1443, 926], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1424], [2078, 1630, 2322, 1971], [2982, 2086, 3210, 2406], [3416, 2187, 3645, 2504], [3857, 2190, 4121, 2534], [2550, 2109, 2761, 2372], [2516, 2042, 2760, 2371], [3004, 2113, 3217, 2414], [3459, 2231, 3661, 2529], [4315, 2351, 4571, 2685], [4786, 2395, 5027, 2733], [3860, 2191, 4125, 2537], [4338, 2416, 4566, 2693], [4813, 2471, 5015, 2747], [104, 1804, 4895, 4481], [395, 3677, 456, 3752]], "scores": [0.3204730153083801, 0.37242603302001953, 0.23759454488754272, 0.43034496903419495, 0.36017072200775146, 0.30831775069236755, 0.407871812582016, 0.23960663378238678, 0.2600855827331543, 0.25338229537010193, 0.24892236292362213, 0.2305223047733307, 0.20275822281837463, 0.419148325920105, 0.40765368938446045, 0.2720944881439209, 0.21971629559993744, 0.268945574760437, 0.37121009826660156, 0.33251044154167175, 0.2987723648548126, 0.20402339100837708], "labels": ["face", "face", "number", "face", "face", "face", "face", "man", "face", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_0_8_0_5", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_0_1_1", "boxes": [[274, 7, 307, 80], [396, 7, 432, 81], [184, 6, 225, 81], [327, 7, 372, 80], [455, 6, 493, 81], [17, 173, 50, 224]], "scores": [0.3115593492984772, 0.27353614568710327, 0.2331414371728897, 0.3268182575702667, 0.2615271210670471, 0.20510555803775787], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VD_illusion_1_12_0_0", "boxes": [[7, 3, 769, 739], [263, 84, 401, 205], [69, 83, 602, 613]], "scores": [0.22488060593605042, 0.2964465022087097, 0.437607079744339], "labels": ["illustration", "triangle", "symbol"]}, {"id": "VD_math_1_9_0_2", "boxes": [[25, 24, 235, 251], [113, 109, 133, 131]], "scores": [0.529377281665802, 0.23372882604599], "labels": ["circle", "circle"]}, {"id": "VD_figure_2_0_1_0", "boxes": [[993, 13, 1285, 573], [1010, 120, 1277, 529], [591, 282, 932, 833], [870, 273, 1212, 782], [1184, 348, 1415, 940], [158, 259, 540, 774]], "scores": [0.25677651166915894, 0.2253587245941162, 0.30577269196510315, 0.3742283880710602, 0.3271706700325012, 0.45145490765571594], "labels": ["witch", "costume", "witch", "horse", "witch", "witch"]}, {"id": "VD_video_1_7_0_3", "boxes": [[733, 5, 1025, 328], [79, 10, 503, 358], [1358, 8, 1502, 327], [696, 1, 1028, 380], [22, 205, 169, 408], [676, 199, 830, 411], [674, 199, 975, 409], [219, 256, 412, 373], [710, 266, 791, 341], [1287, 204, 1453, 418], [219, 278, 274, 334], [760, 281, 818, 339]], "scores": [0.303181916475296, 0.3773871064186096, 0.2636565566062927, 0.2633042633533478, 0.5666344165802002, 0.580400288105011, 0.3132399022579193, 0.3139100670814514, 0.2787001132965088, 0.33315736055374146, 0.2077069878578186, 0.23065535724163055], "labels": ["cable", "cable", "cable", "cable", "socket", "socket", "socket", "plug", "connect", "socket", "connector", "connector"]}, {"id": "VS_chart_1_2_1_3", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_1_17_1_1", "boxes": [[741, 522, 975, 861], [1207, 591, 1465, 929], [176, 768, 233, 842], [764, 570, 969, 868], [1210, 609, 1443, 926], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1424], [2078, 1630, 2322, 1971], [2982, 2086, 3210, 2406], [3416, 2187, 3645, 2504], [3857, 2190, 4121, 2534], [2550, 2109, 2761, 2372], [2516, 2042, 2760, 2371], [3004, 2113, 3217, 2414], [3459, 2231, 3661, 2529], [4315, 2351, 4571, 2685], [4786, 2395, 5027, 2733], [3860, 2191, 4125, 2537], [4338, 2416, 4566, 2693], [4813, 2471, 5015, 2747], [104, 1804, 4895, 4481], [395, 3677, 456, 3752]], "scores": [0.3204730153083801, 0.37242603302001953, 0.23759454488754272, 0.43034496903419495, 0.36017072200775146, 0.30831775069236755, 0.407871812582016, 0.23960663378238678, 0.2600855827331543, 0.25338229537010193, 0.24892236292362213, 0.2305223047733307, 0.20275822281837463, 0.419148325920105, 0.40765368938446045, 0.2720944881439209, 0.21971629559993744, 0.268945574760437, 0.37121009826660156, 0.33251044154167175, 0.2987723648548126, 0.20402339100837708], "labels": ["face", "face", "number", "face", "face", "face", "face", "man", "face", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_1_8_1_0", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_2_0_2_0", "boxes": [[971, 23, 1057, 200], [675, 26, 759, 200], [455, 20, 557, 202], [828, 23, 929, 186], [1116, 19, 1212, 202], [46, 234, 134, 387], [1130, 235, 1217, 390], [2, 7, 1248, 914], [45, 426, 124, 552], [20, 45, 1230, 865]], "scores": [0.29380691051483154, 0.3118255138397217, 0.2627392113208771, 0.20475389063358307, 0.2867979109287262, 0.208632230758667, 0.20263703167438507, 0.29538407921791077, 0.216726154088974, 0.25091007351875305], "labels": ["number", "number", "number", "number", "number", "number", "number", "font", "number", "font"]}, {"id": "VD_illusion_2_12_1_0", "boxes": [[2, 4, 686, 639], [247, 54, 387, 177], [48, 55, 581, 591], [108, 287, 252, 415], [253, 465, 387, 592]], "scores": [0.20369356870651245, 0.38350415229797363, 0.49892503023147583, 0.20083630084991455, 0.21212433278560638], "labels": ["illustration", "triangle", "symbol", "triangle", "symbol"]}, {"id": "VD_math_2_9_1_0", "boxes": [[0, 2, 334, 333], [24, 39, 300, 315], [73, 295, 99, 326]], "scores": [0.21704241633415222, 0.6801661849021912, 0.22887884080410004], "labels": ["angle", "circle", "triangle"]}, {"id": "VD_figure_1_1_0_0", "boxes": [[287, 3, 337, 53], [38, 17, 504, 434]], "scores": [0.8615780472755432, 0.7586017847061157], "labels": ["basketball", "basketball player"]}, {"id": "VD_video_2_7_1_0", "boxes": [[738, 7, 1014, 348], [1330, 17, 1743, 367], [16, 8, 243, 436], [89, 13, 238, 336], [694, 4, 1012, 399], [667, 213, 821, 424], [1269, 216, 1415, 416], [665, 211, 971, 421], [700, 278, 782, 353], [1299, 277, 1381, 350], [20, 210, 189, 428]], "scores": [0.27578240633010864, 0.33433565497398376, 0.23009490966796875, 0.2540660798549652, 0.25264716148376465, 0.5496881008148193, 0.5548987984657288, 0.2063809484243393, 0.2821127474308014, 0.20149219036102295, 0.2822254002094269], "labels": ["wire", "wire", "connect", "wire", "connect", "socket", "socket", "socket", "connect", "plug", "socket"]}, {"id": "VS_chart_1_2_1_4", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_1_17_1_2", "boxes": [[741, 522, 975, 861], [1207, 591, 1465, 929], [176, 768, 233, 842], [764, 570, 969, 868], [1210, 609, 1443, 926], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1424], [2078, 1630, 2322, 1971], [2982, 2086, 3210, 2406], [3416, 2187, 3645, 2504], [3857, 2190, 4121, 2534], [2550, 2109, 2761, 2372], [2516, 2042, 2760, 2371], [3004, 2113, 3217, 2414], [3459, 2231, 3661, 2529], [4315, 2351, 4571, 2685], [4786, 2395, 5027, 2733], [3860, 2191, 4125, 2537], [4338, 2416, 4566, 2693], [4813, 2471, 5015, 2747], [104, 1804, 4895, 4481], [395, 3677, 456, 3752]], "scores": [0.3204730153083801, 0.37242603302001953, 0.23759454488754272, 0.43034496903419495, 0.36017072200775146, 0.30831775069236755, 0.407871812582016, 0.23960663378238678, 0.2600855827331543, 0.25338229537010193, 0.24892236292362213, 0.2305223047733307, 0.20275822281837463, 0.419148325920105, 0.40765368938446045, 0.2720944881439209, 0.21971629559993744, 0.268945574760437, 0.37121009826660156, 0.33251044154167175, 0.2987723648548126, 0.20402339100837708], "labels": ["face", "face", "number", "face", "face", "face", "face", "man", "face", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_1_8_1_1", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_2_0_2_1", "boxes": [[971, 23, 1057, 200], [675, 26, 759, 200], [455, 20, 557, 202], [828, 23, 929, 186], [1116, 19, 1212, 202], [46, 234, 134, 387], [1130, 235, 1217, 390], [2, 7, 1248, 914], [45, 426, 124, 552], [20, 45, 1230, 865]], "scores": [0.29380691051483154, 0.3118255138397217, 0.2627392113208771, 0.20475389063358307, 0.2867979109287262, 0.208632230758667, 0.20263703167438507, 0.29538407921791077, 0.216726154088974, 0.25091007351875305], "labels": ["number", "number", "number", "number", "number", "number", "number", "font", "number", "font"]}, {"id": "VD_illusion_1_13_0_0", "boxes": [[2, 0, 1190, 759]], "scores": [0.3320201337337494], "labels": ["pattern"]}, {"id": "VD_math_2_9_1_1", "boxes": [[0, 2, 334, 333], [24, 39, 300, 315], [73, 295, 99, 326]], "scores": [0.21704241633415222, 0.6801661849021912, 0.22887884080410004], "labels": ["angle", "circle", "triangle"]}, {"id": "VD_figure_1_1_0_1", "boxes": [[287, 3, 337, 53], [38, 17, 504, 434]], "scores": [0.8615780472755432, 0.7586017847061157], "labels": ["basketball", "basketball player"]}, {"id": "VD_video_2_7_1_1", "boxes": [[738, 7, 1014, 348], [1330, 17, 1743, 367], [16, 8, 243, 436], [89, 13, 238, 336], [694, 4, 1012, 399], [667, 213, 821, 424], [1269, 216, 1415, 416], [665, 211, 971, 421], [700, 278, 782, 353], [1299, 277, 1381, 350], [20, 210, 189, 428]], "scores": [0.27578240633010864, 0.33433565497398376, 0.23009490966796875, 0.2540660798549652, 0.25264716148376465, 0.5496881008148193, 0.5548987984657288, 0.2063809484243393, 0.2821127474308014, 0.20149219036102295, 0.2822254002094269], "labels": ["wire", "wire", "connect", "wire", "connect", "socket", "socket", "socket", "connect", "plug", "socket"]}, {"id": "VS_chart_1_2_1_5", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_2_17_2_0", "boxes": [[749, 516, 991, 864], [1208, 592, 1464, 930], [761, 562, 978, 864], [176, 768, 233, 842], [1211, 612, 1443, 927], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1423], [2078, 1630, 2322, 1971], [2982, 2086, 3209, 2406], [3417, 2187, 3646, 2505], [2550, 2108, 2761, 2372], [2515, 2042, 2760, 2371], [3004, 2112, 3217, 2414], [3459, 2230, 3662, 2529], [4314, 2352, 4571, 2685], [4770, 2390, 5007, 2719], [3860, 2190, 4125, 2537], [3892, 2232, 4126, 2534], [4338, 2420, 4567, 2692], [4791, 2439, 4995, 2723], [110, 1813, 4892, 4483], [395, 3677, 456, 3752]], "scores": [0.3128475248813629, 0.3638107180595398, 0.2955958843231201, 0.23968560993671417, 0.32079842686653137, 0.30102813243865967, 0.4035940170288086, 0.24273835122585297, 0.26334530115127563, 0.2546122968196869, 0.2411750853061676, 0.20471763610839844, 0.43622025847435, 0.42642444372177124, 0.27705180644989014, 0.23585113883018494, 0.26745739579200745, 0.2098681926727295, 0.38269051909446716, 0.34943366050720215, 0.301139235496521, 0.20530885457992554], "labels": ["man", "face", "face", "number", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "face", "face", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_1_8_1_2", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_0_1_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_13_1_0", "boxes": [[1, 2, 973, 653]], "scores": [0.31741487979888916], "labels": ["pattern"]}, {"id": "VD_math_2_9_1_2", "boxes": [[0, 2, 334, 333], [24, 39, 300, 315], [73, 295, 99, 326]], "scores": [0.21704241633415222, 0.6801661849021912, 0.22887884080410004], "labels": ["angle", "circle", "triangle"]}, {"id": "VD_figure_2_1_1_0", "boxes": [[474, 64, 561, 143], [24, 3, 551, 508]], "scores": [0.726597011089325, 0.6768003106117249], "labels": ["basketball", "basketball player"]}, {"id": "VD_video_2_7_1_2", "boxes": [[738, 7, 1014, 348], [1330, 17, 1743, 367], [16, 8, 243, 436], [89, 13, 238, 336], [694, 4, 1012, 399], [667, 213, 821, 424], [1269, 216, 1415, 416], [665, 211, 971, 421], [700, 278, 782, 353], [1299, 277, 1381, 350], [20, 210, 189, 428]], "scores": [0.27578240633010864, 0.33433565497398376, 0.23009490966796875, 0.2540660798549652, 0.25264716148376465, 0.5496881008148193, 0.5548987984657288, 0.2063809484243393, 0.2821127474308014, 0.20149219036102295, 0.2822254002094269], "labels": ["wire", "wire", "connect", "wire", "connect", "socket", "socket", "socket", "connect", "plug", "socket"]}, {"id": "VS_chart_1_2_1_6", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_2_17_2_1", "boxes": [[749, 516, 991, 864], [1208, 592, 1464, 930], [761, 562, 978, 864], [176, 768, 233, 842], [1211, 612, 1443, 927], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1423], [2078, 1630, 2322, 1971], [2982, 2086, 3209, 2406], [3417, 2187, 3646, 2505], [2550, 2108, 2761, 2372], [2515, 2042, 2760, 2371], [3004, 2112, 3217, 2414], [3459, 2230, 3662, 2529], [4314, 2352, 4571, 2685], [4770, 2390, 5007, 2719], [3860, 2190, 4125, 2537], [3892, 2232, 4126, 2534], [4338, 2420, 4567, 2692], [4791, 2439, 4995, 2723], [110, 1813, 4892, 4483], [395, 3677, 456, 3752]], "scores": [0.3128475248813629, 0.3638107180595398, 0.2955958843231201, 0.23968560993671417, 0.32079842686653137, 0.30102813243865967, 0.4035940170288086, 0.24273835122585297, 0.26334530115127563, 0.2546122968196869, 0.2411750853061676, 0.20471763610839844, 0.43622025847435, 0.42642444372177124, 0.27705180644989014, 0.23585113883018494, 0.26745739579200745, 0.2098681926727295, 0.38269051909446716, 0.34943366050720215, 0.301139235496521, 0.20530885457992554], "labels": ["man", "face", "face", "number", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "face", "face", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_1_8_1_3", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_1_1_1_0", "boxes": [[259, 26, 499, 148]], "scores": [0.4221480190753937], "labels": ["fly"]}, {"id": "VD_illusion_1_14_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_10_0_0", "boxes": [[243, 0, 275, 39], [0, 0, 590, 490], [54, 65, 528, 422]], "scores": [0.2845700681209564, 0.30386608839035034, 0.7126963138580322], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_1_1_1", "boxes": [[474, 64, 561, 143], [24, 3, 551, 508]], "scores": [0.726597011089325, 0.6768003106117249], "labels": ["basketball", "basketball player"]}, {"id": "VD_video_2_7_1_3", "boxes": [[738, 7, 1014, 348], [1330, 17, 1743, 367], [16, 8, 243, 436], [89, 13, 238, 336], [694, 4, 1012, 399], [667, 213, 821, 424], [1269, 216, 1415, 416], [665, 211, 971, 421], [700, 278, 782, 353], [1299, 277, 1381, 350], [20, 210, 189, 428]], "scores": [0.27578240633010864, 0.33433565497398376, 0.23009490966796875, 0.2540660798549652, 0.25264716148376465, 0.5496881008148193, 0.5548987984657288, 0.2063809484243393, 0.2821127474308014, 0.20149219036102295, 0.2822254002094269], "labels": ["wire", "wire", "connect", "wire", "connect", "socket", "socket", "socket", "connect", "plug", "socket"]}, {"id": "VS_chart_1_2_1_7", "boxes": [[0, 5, 968, 936], [156, 196, 826, 783], [135, 161, 829, 786], [91, 416, 129, 442], [91, 591, 130, 618], [91, 678, 130, 707], [109, 767, 130, 793]], "scores": [0.4332110285758972, 0.4159961938858032, 0.3936682641506195, 0.20653583109378815, 0.22162370383739471, 0.2047366201877594, 0.33943501114845276], "labels": ["graph", "graph", "graph", "number", "number", "number", "number"]}, {"id": "VS_chart_2_17_2_2", "boxes": [[749, 516, 991, 864], [1208, 592, 1464, 930], [761, 562, 978, 864], [176, 768, 233, 842], [1211, 612, 1443, 927], [1651, 1061, 1927, 1419], [1681, 1116, 1923, 1423], [2078, 1630, 2322, 1971], [2982, 2086, 3209, 2406], [3417, 2187, 3646, 2505], [2550, 2108, 2761, 2372], [2515, 2042, 2760, 2371], [3004, 2112, 3217, 2414], [3459, 2230, 3662, 2529], [4314, 2352, 4571, 2685], [4770, 2390, 5007, 2719], [3860, 2190, 4125, 2537], [3892, 2232, 4126, 2534], [4338, 2420, 4567, 2692], [4791, 2439, 4995, 2723], [110, 1813, 4892, 4483], [395, 3677, 456, 3752]], "scores": [0.3128475248813629, 0.3638107180595398, 0.2955958843231201, 0.23968560993671417, 0.32079842686653137, 0.30102813243865967, 0.4035940170288086, 0.24273835122585297, 0.26334530115127563, 0.2546122968196869, 0.2411750853061676, 0.20471763610839844, 0.43622025847435, 0.42642444372177124, 0.27705180644989014, 0.23585113883018494, 0.26745739579200745, 0.2098681926727295, 0.38269051909446716, 0.34943366050720215, 0.301139235496521, 0.20530885457992554], "labels": ["man", "face", "face", "number", "face", "face", "face", "man", "face", "face", "face", "man", "face", "face", "face", "face", "face", "face", "face", "face", "graph", "number"]}, {"id": "VS_table_1_8_1_4", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_2_1_2_0", "boxes": [[449, 83, 814, 269]], "scores": [0.4249372184276581], "labels": ["fly"]}, {"id": "VD_illusion_1_14_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_10_0_1", "boxes": [[243, 0, 275, 39], [0, 0, 590, 490], [54, 65, 528, 422]], "scores": [0.2845700681209564, 0.30386608839035034, 0.7126963138580322], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_2_0_0", "boxes": [[2, 6, 1451, 994], [923, 140, 1171, 317], [182, 210, 374, 381], [8, 210, 382, 632], [491, 299, 822, 751], [5, 222, 499, 838], [1104, 247, 1382, 737], [810, 272, 1180, 864], [48, 347, 505, 837], [0, 646, 1438, 1001]], "scores": [0.6811930537223816, 0.33512625098228455, 0.25474730134010315, 0.4142718017101288, 0.4496144950389862, 0.3811623156070709, 0.3975890576839447, 0.4091910123825073, 0.39351603388786316, 0.27556923031806946], "labels": ["grass", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "grass"]}, {"id": "VD_video_1_8_0_0", "boxes": [[13, 16, 95, 72], [356, 15, 425, 92], [357, 15, 457, 95], [711, 17, 785, 109], [713, 17, 803, 109], [255, 80, 281, 197], [12, 75, 84, 197], [357, 13, 464, 200], [261, 78, 335, 200], [356, 116, 427, 199], [598, 118, 685, 200], [783, 137, 814, 197], [427, 122, 463, 197], [713, 135, 781, 199], [959, 137, 1036, 197]], "scores": [0.26973050832748413, 0.21510468423366547, 0.36725762486457825, 0.22973497211933136, 0.31349706649780273, 0.22492137551307678, 0.3044678568840027, 0.2011730819940567, 0.36290812492370605, 0.35074275732040405, 0.3716704547405243, 0.22165875136852264, 0.23465465009212494, 0.3007154166698456, 0.2748212516307831], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "ramp", "building", "building"]}, {"id": "VS_chart_0_3_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_18_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_8_1_5", "boxes": [[67, 56, 1127, 88], [68, 143, 1104, 536], [912, 469, 929, 492]], "scores": [0.3399414122104645, 0.26298123598098755, 0.20527061820030212], "labels": ["text", "text", "number"]}, {"id": "VS_ocr_0_2_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_14_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_10_0_2", "boxes": [[243, 0, 275, 39], [0, 0, 590, 490], [54, 65, 528, 422]], "scores": [0.2845700681209564, 0.30386608839035034, 0.7126963138580322], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_2_0_1", "boxes": [[2, 6, 1451, 994], [923, 140, 1171, 317], [182, 210, 374, 381], [8, 210, 382, 632], [491, 299, 822, 751], [5, 222, 499, 838], [1104, 247, 1382, 737], [810, 272, 1180, 864], [48, 347, 505, 837], [0, 646, 1438, 1001]], "scores": [0.6811930537223816, 0.33512625098228455, 0.25474730134010315, 0.4142718017101288, 0.4496144950389862, 0.3811623156070709, 0.3975890576839447, 0.4091910123825073, 0.39351603388786316, 0.27556923031806946], "labels": ["grass", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "chicken", "grass"]}, {"id": "VD_video_1_8_0_1", "boxes": [[13, 16, 95, 72], [356, 15, 425, 92], [357, 15, 457, 95], [711, 17, 785, 109], [713, 17, 803, 109], [255, 80, 281, 197], [12, 75, 84, 197], [357, 13, 464, 200], [261, 78, 335, 200], [356, 116, 427, 199], [598, 118, 685, 200], [783, 137, 814, 197], [427, 122, 463, 197], [713, 135, 781, 199], [959, 137, 1036, 197]], "scores": [0.26973050832748413, 0.21510468423366547, 0.36725762486457825, 0.22973497211933136, 0.31349706649780273, 0.22492137551307678, 0.3044678568840027, 0.2011730819940567, 0.36290812492370605, 0.35074275732040405, 0.3716704547405243, 0.22165875136852264, 0.23465465009212494, 0.3007154166698456, 0.2748212516307831], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "ramp", "building", "building"]}, {"id": "VS_chart_0_3_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_18_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_8_2_0", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_0_2_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_14_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_10_1_0", "boxes": [[266, 1, 298, 40], [3, -1, 625, 482], [76, 66, 551, 423]], "scores": [0.30672135949134827, 0.30381301045417786, 0.7188193798065186], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_2_1_0", "boxes": [[303, 45, 385, 100], [2, 2, 480, 325], [61, 69, 122, 125], [0, 69, 139, 268], [306, 88, 364, 102], [2, 68, 124, 212], [1, 70, 164, 275], [260, 106, 384, 286], [373, 80, 457, 242], [161, 98, 253, 254], [34, 115, 168, 275]], "scores": [0.6177960634231567, 0.526031494140625, 0.3823704421520233, 0.20026849210262299, 0.4416334331035614, 0.520195722579956, 0.5270238518714905, 0.6718800663948059, 0.6114940047264099, 0.6317269206047058, 0.5088797211647034], "labels": ["duckling", "grass", "duckling", "duckling", "duckling", "duckling", "duckling", "duck", "duckling", "duckling", "duckling"]}, {"id": "VD_video_1_8_0_2", "boxes": [[13, 16, 95, 72], [356, 15, 425, 92], [357, 15, 457, 95], [711, 17, 785, 109], [713, 17, 803, 109], [255, 80, 281, 197], [12, 75, 84, 197], [357, 13, 464, 200], [261, 78, 335, 200], [356, 116, 427, 199], [598, 118, 685, 200], [783, 137, 814, 197], [427, 122, 463, 197], [713, 135, 781, 199], [959, 137, 1036, 197]], "scores": [0.26973050832748413, 0.21510468423366547, 0.36725762486457825, 0.22973497211933136, 0.31349706649780273, 0.22492137551307678, 0.3044678568840027, 0.2011730819940567, 0.36290812492370605, 0.35074275732040405, 0.3716704547405243, 0.22165875136852264, 0.23465465009212494, 0.3007154166698456, 0.2748212516307831], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "ramp", "building", "building"]}, {"id": "VS_chart_0_3_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_18_1_0", "boxes": [[0, 1, 945, 687], [31, 127, 941, 574], [764, 418, 805, 485], [761, 417, 879, 494], [827, 427, 878, 492], [678, 471, 760, 557]], "scores": [0.5857235789299011, 0.30128636956214905, 0.24327300488948822, 0.3625192642211914, 0.2163078486919403, 0.35966217517852783], "labels": ["screenshot", "screenshot", "game controller", "game controller", "game controller", "game controller"]}, {"id": "VS_table_2_8_2_1", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_0_2_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_14_1_0", "boxes": [[87, 43, 243, 254]], "scores": [0.8633447289466858], "labels": ["triangle"]}, {"id": "VD_math_2_10_1_1", "boxes": [[266, 1, 298, 40], [3, -1, 625, 482], [76, 66, 551, 423]], "scores": [0.30672135949134827, 0.30381301045417786, 0.7188193798065186], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_2_1_1", "boxes": [[303, 45, 385, 100], [2, 2, 480, 325], [61, 69, 122, 125], [0, 69, 139, 268], [306, 88, 364, 102], [2, 68, 124, 212], [1, 70, 164, 275], [260, 106, 384, 286], [373, 80, 457, 242], [161, 98, 253, 254], [34, 115, 168, 275]], "scores": [0.6177960634231567, 0.526031494140625, 0.3823704421520233, 0.20026849210262299, 0.4416334331035614, 0.520195722579956, 0.5270238518714905, 0.6718800663948059, 0.6114940047264099, 0.6317269206047058, 0.5088797211647034], "labels": ["duckling", "grass", "duckling", "duckling", "duckling", "duckling", "duckling", "duck", "duckling", "duckling", "duckling"]}, {"id": "VD_video_1_8_0_3", "boxes": [[13, 16, 95, 72], [356, 15, 425, 92], [357, 15, 457, 95], [711, 17, 785, 109], [713, 17, 803, 109], [255, 80, 281, 197], [12, 75, 84, 197], [357, 13, 464, 200], [261, 78, 335, 200], [356, 116, 427, 199], [598, 118, 685, 200], [783, 137, 814, 197], [427, 122, 463, 197], [713, 135, 781, 199], [959, 137, 1036, 197]], "scores": [0.26973050832748413, 0.21510468423366547, 0.36725762486457825, 0.22973497211933136, 0.31349706649780273, 0.22492137551307678, 0.3044678568840027, 0.2011730819940567, 0.36290812492370605, 0.35074275732040405, 0.3716704547405243, 0.22165875136852264, 0.23465465009212494, 0.3007154166698456, 0.2748212516307831], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "ramp", "building", "building"]}, {"id": "VS_chart_0_3_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_18_1_1", "boxes": [[0, 1, 945, 687], [31, 127, 941, 574], [764, 418, 805, 485], [761, 417, 879, 494], [827, 427, 878, 492], [678, 471, 760, 557]], "scores": [0.5857235789299011, 0.30128636956214905, 0.24327300488948822, 0.3625192642211914, 0.2163078486919403, 0.35966217517852783], "labels": ["screenshot", "screenshot", "game controller", "game controller", "game controller", "game controller"]}, {"id": "VS_table_2_8_2_2", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_1_2_1_0", "boxes": [[282, 29, 325, 101], [382, 28, 429, 101], [483, 30, 525, 101], [889, 28, 938, 102], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 944, 104], [4, 31, 977, 353], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [19, 274, 933, 353]], "scores": [0.2585195004940033, 0.2103777676820755, 0.2343512326478958, 0.3306387662887573, 0.21496163308620453, 0.31442132592201233, 0.28440946340560913, 0.40058448910713196, 0.21848155558109283, 0.2031724452972412, 0.2037210315465927, 0.3259463310241699, 0.20116612315177917, 0.22098985314369202, 0.284860223531723, 0.22434180974960327, 0.20090587437152863, 0.27975037693977356, 0.23013141751289368, 0.20377209782600403], "labels": ["number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_2_14_1_1", "boxes": [[87, 43, 243, 254]], "scores": [0.8633447289466858], "labels": ["triangle"]}, {"id": "VD_math_2_10_1_2", "boxes": [[266, 1, 298, 40], [3, -1, 625, 482], [76, 66, 551, 423]], "scores": [0.30672135949134827, 0.30381301045417786, 0.7188193798065186], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_3_0_0", "boxes": [[58, 0, 266, 157], [34, 1, 266, 222], [15, 4, 268, 358], [136, 151, 188, 207], [19, 78, 196, 356], [19, 87, 181, 353], [40, 118, 156, 344], [47, 154, 146, 341]], "scores": [0.2299436628818512, 0.3248644173145294, 0.5008465051651001, 0.2739837169647217, 0.22227686643600464, 0.51600581407547, 0.28531667590141296, 0.2798604667186737], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_2_8_1_0", "boxes": [[5, 14, 75, 104], [355, 13, 425, 88], [723, 11, 803, 63], [5, 14, 107, 108], [354, 12, 451, 94], [597, 115, 623, 195], [724, 72, 793, 195], [971, 74, 1047, 196], [5, 131, 73, 196], [249, 132, 330, 195], [354, 113, 424, 195], [595, 115, 679, 196], [77, 136, 110, 194], [2, 15, 1035, 207]], "scores": [0.2769985496997833, 0.24290141463279724, 0.2899723947048187, 0.36094900965690613, 0.3567717969417572, 0.21451158821582794, 0.3675132095813751, 0.3254251480102539, 0.26336470246315, 0.29331937432289124, 0.305667519569397, 0.3404632806777954, 0.23473724722862244, 0.22223351895809174], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "photo"]}, {"id": "VS_chart_2_3_1_0", "boxes": [[-2, 2, 501, 443], [225, 157, 421, 363], [111, 420, 121, 434]], "scores": [0.5694429278373718, 0.28417786955833435, 0.37050172686576843], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_2_18_2_0", "boxes": [[-1, 2, 945, 686], [31, 128, 940, 572], [764, 418, 805, 485], [761, 417, 878, 494], [827, 427, 878, 493], [678, 471, 760, 557]], "scores": [0.5930556058883667, 0.24437950551509857, 0.24721837043762207, 0.3727889060974121, 0.21899890899658203, 0.36721858382225037], "labels": ["screenshot", "screenshot", "game controller", "game controller", "game controller", "game controller"]}, {"id": "VS_table_2_8_2_3", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_1_2_1_1", "boxes": [[282, 29, 325, 101], [382, 28, 429, 101], [483, 30, 525, 101], [889, 28, 938, 102], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 944, 104], [4, 31, 977, 353], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [19, 274, 933, 353]], "scores": [0.2585195004940033, 0.2103777676820755, 0.2343512326478958, 0.3306387662887573, 0.21496163308620453, 0.31442132592201233, 0.28440946340560913, 0.40058448910713196, 0.21848155558109283, 0.2031724452972412, 0.2037210315465927, 0.3259463310241699, 0.20116612315177917, 0.22098985314369202, 0.284860223531723, 0.22434180974960327, 0.20090587437152863, 0.27975037693977356, 0.23013141751289368, 0.20377209782600403], "labels": ["number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_2_14_1_2", "boxes": [[87, 43, 243, 254]], "scores": [0.8633447289466858], "labels": ["triangle"]}, {"id": "VD_math_1_11_0_0", "boxes": [[245, 1, 277, 40], [0, 0, 598, 496], [56, 65, 530, 423]], "scores": [0.29713544249534607, 0.31079012155532837, 0.7163565754890442], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_3_0_1", "boxes": [[58, 0, 266, 157], [34, 1, 266, 222], [15, 4, 268, 358], [136, 151, 188, 207], [19, 78, 196, 356], [19, 87, 181, 353], [40, 118, 156, 344], [47, 154, 146, 341]], "scores": [0.2299436628818512, 0.3248644173145294, 0.5008465051651001, 0.2739837169647217, 0.22227686643600464, 0.51600581407547, 0.28531667590141296, 0.2798604667186737], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_2_8_1_1", "boxes": [[5, 14, 75, 104], [355, 13, 425, 88], [723, 11, 803, 63], [5, 14, 107, 108], [354, 12, 451, 94], [597, 115, 623, 195], [724, 72, 793, 195], [971, 74, 1047, 196], [5, 131, 73, 196], [249, 132, 330, 195], [354, 113, 424, 195], [595, 115, 679, 196], [77, 136, 110, 194], [2, 15, 1035, 207]], "scores": [0.2769985496997833, 0.24290141463279724, 0.2899723947048187, 0.36094900965690613, 0.3567717969417572, 0.21451158821582794, 0.3675132095813751, 0.3254251480102539, 0.26336470246315, 0.29331937432289124, 0.305667519569397, 0.3404632806777954, 0.23473724722862244, 0.22223351895809174], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "photo"]}, {"id": "VS_chart_2_3_1_1", "boxes": [[-2, 2, 501, 443], [225, 157, 421, 363], [111, 420, 121, 434]], "scores": [0.5694429278373718, 0.28417786955833435, 0.37050172686576843], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_2_18_2_1", "boxes": [[-1, 2, 945, 686], [31, 128, 940, 572], [764, 418, 805, 485], [761, 417, 878, 494], [827, 427, 878, 493], [678, 471, 760, 557]], "scores": [0.5930556058883667, 0.24437950551509857, 0.24721837043762207, 0.3727889060974121, 0.21899890899658203, 0.36721858382225037], "labels": ["screenshot", "screenshot", "game controller", "game controller", "game controller", "game controller"]}, {"id": "VS_table_2_8_2_4", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_1_2_1_2", "boxes": [[282, 29, 325, 101], [382, 28, 429, 101], [483, 30, 525, 101], [889, 28, 938, 102], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 944, 104], [4, 31, 977, 353], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [19, 274, 933, 353]], "scores": [0.2585195004940033, 0.2103777676820755, 0.2343512326478958, 0.3306387662887573, 0.21496163308620453, 0.31442132592201233, 0.28440946340560913, 0.40058448910713196, 0.21848155558109283, 0.2031724452972412, 0.2037210315465927, 0.3259463310241699, 0.20116612315177917, 0.22098985314369202, 0.284860223531723, 0.22434180974960327, 0.20090587437152863, 0.27975037693977356, 0.23013141751289368, 0.20377209782600403], "labels": ["number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_2_14_1_3", "boxes": [[87, 43, 243, 254]], "scores": [0.8633447289466858], "labels": ["triangle"]}, {"id": "VD_math_1_11_0_1", "boxes": [[245, 1, 277, 40], [0, 0, 598, 496], [56, 65, 530, 423]], "scores": [0.29713544249534607, 0.31079012155532837, 0.7163565754890442], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_3_1_0", "boxes": [[62, 25, 293, 210], [55, 26, 294, 238], [0, 3, 306, 410], [32, 29, 296, 396], [50, 29, 295, 372], [141, 174, 225, 292], [32, 110, 199, 394], [56, 150, 191, 387]], "scores": [0.23514661192893982, 0.24976786971092224, 0.35570529103279114, 0.46060892939567566, 0.20929232239723206, 0.27591630816459656, 0.46670427918434143, 0.40971639752388], "labels": ["cartoon character", "cartoon character", "cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_2_8_1_2", "boxes": [[5, 14, 75, 104], [355, 13, 425, 88], [723, 11, 803, 63], [5, 14, 107, 108], [354, 12, 451, 94], [597, 115, 623, 195], [724, 72, 793, 195], [971, 74, 1047, 196], [5, 131, 73, 196], [249, 132, 330, 195], [354, 113, 424, 195], [595, 115, 679, 196], [77, 136, 110, 194], [2, 15, 1035, 207]], "scores": [0.2769985496997833, 0.24290141463279724, 0.2899723947048187, 0.36094900965690613, 0.3567717969417572, 0.21451158821582794, 0.3675132095813751, 0.3254251480102539, 0.26336470246315, 0.29331937432289124, 0.305667519569397, 0.3404632806777954, 0.23473724722862244, 0.22223351895809174], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "photo"]}, {"id": "VS_chart_2_3_1_2", "boxes": [[-2, 2, 501, 443], [225, 157, 421, 363], [111, 420, 121, 434]], "scores": [0.5694429278373718, 0.28417786955833435, 0.37050172686576843], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_0_19_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_8_2_5", "boxes": [[78, 60, 1131, 93], [81, 147, 1104, 538], [878, 377, 894, 400], [899, 377, 917, 401], [112, 426, 1089, 446], [919, 473, 937, 496], [84, 461, 1108, 537]], "scores": [0.3600807785987854, 0.2330840677022934, 0.2023194134235382, 0.20546293258666992, 0.21124759316444397, 0.20363526046276093, 0.20465746521949768], "labels": ["text", "text", "number", "number", "line", "number", "line"]}, {"id": "VS_ocr_2_2_2_0", "boxes": [[227, 30, 278, 101], [281, 28, 329, 99], [382, 28, 429, 100], [483, 30, 525, 101], [638, 28, 681, 100], [790, 28, 835, 101], [889, 28, 938, 102], [434, 29, 479, 101], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 945, 104], [1, -1, 987, 392], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [5, 29, 981, 352], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [18, 274, 933, 353]], "scores": [0.21343985199928284, 0.23763051629066467, 0.22559623420238495, 0.2497144639492035, 0.20038855075836182, 0.20202255249023438, 0.33682388067245483, 0.20485924184322357, 0.22460147738456726, 0.32927483320236206, 0.2857237160205841, 0.20484255254268646, 0.22347962856292725, 0.2028236985206604, 0.20482750236988068, 0.325677752494812, 0.20452600717544556, 0.2276700884103775, 0.2887337803840637, 0.39784568548202515, 0.22831577062606812, 0.2019687294960022, 0.2836439609527588, 0.23299115896224976, 0.20774157345294952], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_1_15_0_0", "boxes": [[-2, -1, 682, 460], [482, 10, 596, 178], [102, 199, 262, 419]], "scores": [0.25381553173065186, 0.3678838312625885, 0.4372887909412384], "labels": ["color", "pyramid", "pyramid"]}, {"id": "VD_math_1_11_0_2", "boxes": [[245, 1, 277, 40], [0, 0, 598, 496], [56, 65, 530, 423]], "scores": [0.29713544249534607, 0.31079012155532837, 0.7163565754890442], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_3_1_1", "boxes": [[62, 25, 293, 210], [55, 26, 294, 238], [0, 3, 306, 410], [32, 29, 296, 396], [50, 29, 295, 372], [141, 174, 225, 292], [32, 110, 199, 394], [56, 150, 191, 387]], "scores": [0.23514661192893982, 0.24976786971092224, 0.35570529103279114, 0.46060892939567566, 0.20929232239723206, 0.27591630816459656, 0.46670427918434143, 0.40971639752388], "labels": ["cartoon character", "cartoon character", "cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_2_8_1_3", "boxes": [[5, 14, 75, 104], [355, 13, 425, 88], [723, 11, 803, 63], [5, 14, 107, 108], [354, 12, 451, 94], [597, 115, 623, 195], [724, 72, 793, 195], [971, 74, 1047, 196], [5, 131, 73, 196], [249, 132, 330, 195], [354, 113, 424, 195], [595, 115, 679, 196], [77, 136, 110, 194], [2, 15, 1035, 207]], "scores": [0.2769985496997833, 0.24290141463279724, 0.2899723947048187, 0.36094900965690613, 0.3567717969417572, 0.21451158821582794, 0.3675132095813751, 0.3254251480102539, 0.26336470246315, 0.29331937432289124, 0.305667519569397, 0.3404632806777954, 0.23473724722862244, 0.22223351895809174], "labels": ["building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "building", "ramp", "photo"]}, {"id": "VS_chart_2_3_1_3", "boxes": [[-2, 2, 501, 443], [225, 157, 421, 363], [111, 420, 121, 434]], "scores": [0.5694429278373718, 0.28417786955833435, 0.37050172686576843], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_0_19_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_8_3_0", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_2_2_2_1", "boxes": [[227, 30, 278, 101], [281, 28, 329, 99], [382, 28, 429, 100], [483, 30, 525, 101], [638, 28, 681, 100], [790, 28, 835, 101], [889, 28, 938, 102], [434, 29, 479, 101], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 945, 104], [1, -1, 987, 392], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [5, 29, 981, 352], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [18, 274, 933, 353]], "scores": [0.21343985199928284, 0.23763051629066467, 0.22559623420238495, 0.2497144639492035, 0.20038855075836182, 0.20202255249023438, 0.33682388067245483, 0.20485924184322357, 0.22460147738456726, 0.32927483320236206, 0.2857237160205841, 0.20484255254268646, 0.22347962856292725, 0.2028236985206604, 0.20482750236988068, 0.325677752494812, 0.20452600717544556, 0.2276700884103775, 0.2887337803840637, 0.39784568548202515, 0.22831577062606812, 0.2019687294960022, 0.2836439609527588, 0.23299115896224976, 0.20774157345294952], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_1_15_0_1", "boxes": [[-2, -1, 682, 460], [482, 10, 596, 178], [102, 199, 262, 419]], "scores": [0.25381553173065186, 0.3678838312625885, 0.4372887909412384], "labels": ["color", "pyramid", "pyramid"]}, {"id": "VD_math_2_11_1_0", "boxes": [[258, 2, 290, 41], [2, -1, 611, 489], [69, 66, 544, 424]], "scores": [0.29983043670654297, 0.2971884310245514, 0.7009137868881226], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_4_0_0", "boxes": [[0, 1, 308, 223], [90, 56, 169, 95], [147, 74, 208, 115], [57, 72, 142, 109], [46, 101, 123, 145], [49, 54, 288, 187], [49, 57, 211, 151], [166, 129, 219, 179], [236, 138, 289, 187]], "scores": [0.41498276591300964, 0.6075334548950195, 0.562379777431488, 0.5894719362258911, 0.610741913318634, 0.20246055722236633, 0.2245146632194519, 0.5473758578300476, 0.5277127027511597], "labels": ["calm", "duck", "duck", "duck", "duck", "flock", "flock", "duck", "duck"]}, {"id": "VD_video_1_9_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_3_2_0", "boxes": [[-1, -1, 1144, 997], [515, 350, 965, 819], [255, 951, 280, 983]], "scores": [0.6880002021789551, 0.24716611206531525, 0.2946114242076874], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_1_19_1_0", "boxes": [[4, 3, 663, 373], [25, 72, 650, 332], [7, 2, 663, 818], [9, 380, 662, 831], [27, 417, 648, 650], [15, 398, 657, 815]], "scores": [0.20745989680290222, 0.4405219852924347, 0.2347988784313202, 0.21149662137031555, 0.37292513251304626, 0.20966443419456482], "labels": ["graph", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_table_2_8_3_1", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_2_2_2_2", "boxes": [[227, 30, 278, 101], [281, 28, 329, 99], [382, 28, 429, 100], [483, 30, 525, 101], [638, 28, 681, 100], [790, 28, 835, 101], [889, 28, 938, 102], [434, 29, 479, 101], [686, 29, 732, 102], [152, 30, 202, 102], [14, 26, 945, 104], [1, -1, 987, 392], [68, 151, 114, 224], [682, 151, 725, 224], [782, 151, 828, 224], [934, 151, 982, 223], [523, 152, 571, 224], [630, 153, 671, 225], [20, 153, 982, 228], [5, 29, 981, 352], [322, 276, 369, 346], [835, 274, 877, 348], [14, 277, 64, 348], [67, 275, 111, 347], [18, 274, 933, 353]], "scores": [0.21343985199928284, 0.23763051629066467, 0.22559623420238495, 0.2497144639492035, 0.20038855075836182, 0.20202255249023438, 0.33682388067245483, 0.20485924184322357, 0.22460147738456726, 0.32927483320236206, 0.2857237160205841, 0.20484255254268646, 0.22347962856292725, 0.2028236985206604, 0.20482750236988068, 0.325677752494812, 0.20452600717544556, 0.2276700884103775, 0.2887337803840637, 0.39784568548202515, 0.22831577062606812, 0.2019687294960022, 0.2836439609527588, 0.23299115896224976, 0.20774157345294952], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "number", "number", "font", "font", "number", "number", "number", "number", "font"]}, {"id": "VD_illusion_2_15_1_0", "boxes": [[489, 14, 602, 181], [-2, 0, 699, 462], [108, 204, 270, 423]], "scores": [0.3076629936695099, 0.23174701631069183, 0.46471887826919556], "labels": ["pyramid", "color", "triangle"]}, {"id": "VD_math_2_11_1_1", "boxes": [[258, 2, 290, 41], [2, -1, 611, 489], [69, 66, 544, 424]], "scores": [0.29983043670654297, 0.2971884310245514, 0.7009137868881226], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_4_0_1", "boxes": [[0, 1, 308, 223], [90, 56, 169, 95], [147, 74, 208, 115], [57, 72, 142, 109], [46, 101, 123, 145], [49, 54, 288, 187], [49, 57, 211, 151], [166, 129, 219, 179], [236, 138, 289, 187]], "scores": [0.41498276591300964, 0.6075334548950195, 0.562379777431488, 0.5894719362258911, 0.610741913318634, 0.20246055722236633, 0.2245146632194519, 0.5473758578300476, 0.5277127027511597], "labels": ["calm", "duck", "duck", "duck", "duck", "flock", "flock", "duck", "duck"]}, {"id": "VD_video_1_9_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_3_2_1", "boxes": [[-1, -1, 1144, 997], [515, 350, 965, 819], [255, 951, 280, 983]], "scores": [0.6880002021789551, 0.24716611206531525, 0.2946114242076874], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_1_19_1_1", "boxes": [[4, 3, 663, 373], [25, 72, 650, 332], [7, 2, 663, 818], [9, 380, 662, 831], [27, 417, 648, 650], [15, 398, 657, 815]], "scores": [0.20745989680290222, 0.4405219852924347, 0.2347988784313202, 0.21149662137031555, 0.37292513251304626, 0.20966443419456482], "labels": ["graph", "graph", "graph", "graph", "graph", "graph"]}, {"id": "VS_table_2_8_3_2", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_0_3_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_15_1_1", "boxes": [[489, 14, 602, 181], [-2, 0, 699, 462], [108, 204, 270, 423]], "scores": [0.3076629936695099, 0.23174701631069183, 0.46471887826919556], "labels": ["pyramid", "color", "triangle"]}, {"id": "VD_math_2_11_1_2", "boxes": [[258, 2, 290, 41], [2, -1, 611, 489], [69, 66, 544, 424]], "scores": [0.29983043670654297, 0.2971884310245514, 0.7009137868881226], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_4_1_0", "boxes": [[0, 2, 408, 300], [120, 75, 227, 128], [195, 100, 278, 155], [74, 96, 188, 151], [60, 135, 166, 194], [172, 168, 310, 250], [316, 186, 387, 252], [191, 176, 304, 240], [64, 194, 161, 236], [247, 211, 299, 240], [192, 234, 296, 253]], "scores": [0.4610275328159332, 0.6351754069328308, 0.5870119333267212, 0.6210136413574219, 0.6445309519767761, 0.22338426113128662, 0.555915355682373, 0.6643628478050232, 0.27269673347473145, 0.22624951601028442, 0.3667801022529602], "labels": ["water", "duck", "duck", "duck", "duck", "pond", "duck", "duck", "duckling", "duckling", "duckling"]}, {"id": "VD_video_1_9_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_3_2_2", "boxes": [[-1, -1, 1144, 997], [515, 350, 965, 819], [255, 951, 280, 983]], "scores": [0.6880002021789551, 0.24716611206531525, 0.2946114242076874], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_2_19_2_0", "boxes": [[13, 57, 580, 291], [4, 0, 580, 733], [14, 372, 575, 580]], "scores": [0.45911139249801636, 0.2462143898010254, 0.40626323223114014], "labels": ["graph", "graph", "graph"]}, {"id": "VS_table_2_8_3_3", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_0_3_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_16_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_12_0_0", "boxes": [[277, 16, 309, 55], [0, 1, 659, 500], [193, 89, 330, 309], [89, 81, 564, 442]], "scores": [0.2952952980995178, 0.29689767956733704, 0.20381969213485718, 0.6956818103790283], "labels": ["triangle", "angle", "triangle", "triangle"]}, {"id": "VD_figure_2_4_1_1", "boxes": [[0, 2, 408, 300], [120, 75, 227, 128], [195, 100, 278, 155], [74, 96, 188, 151], [60, 135, 166, 194], [172, 168, 310, 250], [316, 186, 387, 252], [191, 176, 304, 240], [64, 194, 161, 236], [247, 211, 299, 240], [192, 234, 296, 253]], "scores": [0.4610275328159332, 0.6351754069328308, 0.5870119333267212, 0.6210136413574219, 0.6445309519767761, 0.22338426113128662, 0.555915355682373, 0.6643628478050232, 0.27269673347473145, 0.22624951601028442, 0.3667801022529602], "labels": ["water", "duck", "duck", "duck", "duck", "pond", "duck", "duck", "duckling", "duckling", "duckling"]}, {"id": "VD_video_2_9_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_3_2_3", "boxes": [[-1, -1, 1144, 997], [515, 350, 965, 819], [255, 951, 280, 983]], "scores": [0.6880002021789551, 0.24716611206531525, 0.2946114242076874], "labels": ["graph", "graph", "number"]}, {"id": "VS_chart_2_19_2_1", "boxes": [[13, 57, 580, 291], [4, 0, 580, 733], [14, 372, 575, 580]], "scores": [0.45911139249801636, 0.2462143898010254, 0.40626323223114014], "labels": ["graph", "graph", "graph"]}, {"id": "VS_table_2_8_3_4", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_0_3_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_16_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_12_0_1", "boxes": [[277, 16, 309, 55], [0, 1, 659, 500], [193, 89, 330, 309], [89, 81, 564, 442]], "scores": [0.2952952980995178, 0.29689767956733704, 0.20381969213485718, 0.6956818103790283], "labels": ["triangle", "angle", "triangle", "triangle"]}, {"id": "VD_figure_1_5_0_0", "boxes": [[42, 42, 118, 279], [41, 52, 596, 289], [116, 61, 169, 274], [176, 71, 205, 274], [306, 122, 353, 275], [406, 130, 428, 277], [219, 86, 247, 276], [262, 104, 289, 276], [368, 122, 390, 276], [443, 146, 479, 277], [491, 137, 515, 278], [525, 158, 544, 276], [565, 159, 588, 278], [18, 295, 644, 311], [19, 292, 645, 313]], "scores": [0.5647631883621216, 0.2430983930826187, 0.5599377155303955, 0.33786213397979736, 0.5420149564743042, 0.4949575662612915, 0.49088993668556213, 0.4853684902191162, 0.5097395777702332, 0.4033023416996002, 0.5559050440788269, 0.5114025473594666, 0.5578174591064453, 0.23234906792640686, 0.6154225468635559], "labels": ["hammer", "tool", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "ruler", "ruler"]}, {"id": "VD_video_2_9_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_4_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_20_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_8_3_5", "boxes": [[110, 74, 1158, 107], [127, 252, 1117, 273], [111, 161, 1131, 552], [906, 486, 917, 509], [112, 471, 1130, 549]], "scores": [0.35956132411956787, 0.2269483357667923, 0.2585640251636505, 0.23601560294628143, 0.20489856600761414], "labels": ["text", "line", "text", "number", "line"]}, {"id": "VS_ocr_1_3_1_0", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [127, 129, 155, 181], [203, 128, 237, 180], [246, 128, 274, 181], [25, 129, 54, 181], [83, 129, 118, 179], [162, 130, 196, 180], [9, 30, 287, 428], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 163, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 287, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 125, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.24305878579616547, 0.369390606880188, 0.3416445255279541, 0.2652149796485901, 0.3357091546058655, 0.3787643015384674, 0.326435387134552, 0.32467836141586304, 0.43446511030197144, 0.27442434430122375, 0.20771633088588715, 0.2357201874256134, 0.24885401129722595, 0.2613590657711029, 0.25927019119262695, 0.20610323548316956, 0.21544493734836578, 0.2237813025712967, 0.23001869022846222, 0.20728859305381775, 0.24771104753017426, 0.3840133845806122, 0.22992052137851715, 0.20010508596897125], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_2_16_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_12_0_2", "boxes": [[277, 16, 309, 55], [0, 1, 659, 500], [193, 89, 330, 309], [89, 81, 564, 442]], "scores": [0.2952952980995178, 0.29689767956733704, 0.20381969213485718, 0.6956818103790283], "labels": ["triangle", "angle", "triangle", "triangle"]}, {"id": "VD_figure_1_5_0_1", "boxes": [[42, 42, 118, 279], [41, 52, 596, 289], [116, 61, 169, 274], [176, 71, 205, 274], [306, 122, 353, 275], [406, 130, 428, 277], [219, 86, 247, 276], [262, 104, 289, 276], [368, 122, 390, 276], [443, 146, 479, 277], [491, 137, 515, 278], [525, 158, 544, 276], [565, 159, 588, 278], [18, 295, 644, 311], [19, 292, 645, 313]], "scores": [0.5647631883621216, 0.2430983930826187, 0.5599377155303955, 0.33786213397979736, 0.5420149564743042, 0.4949575662612915, 0.49088993668556213, 0.4853684902191162, 0.5097395777702332, 0.4033023416996002, 0.5559050440788269, 0.5114025473594666, 0.5578174591064453, 0.23234906792640686, 0.6154225468635559], "labels": ["hammer", "tool", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "ruler", "ruler"]}, {"id": "VD_video_2_9_1_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_4_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_20_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_9_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_3_1_1", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [127, 129, 155, 181], [203, 128, 237, 180], [246, 128, 274, 181], [25, 129, 54, 181], [83, 129, 118, 179], [162, 130, 196, 180], [9, 30, 287, 428], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 163, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 287, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 125, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.24305878579616547, 0.369390606880188, 0.3416445255279541, 0.2652149796485901, 0.3357091546058655, 0.3787643015384674, 0.326435387134552, 0.32467836141586304, 0.43446511030197144, 0.27442434430122375, 0.20771633088588715, 0.2357201874256134, 0.24885401129722595, 0.2613590657711029, 0.25927019119262695, 0.20610323548316956, 0.21544493734836578, 0.2237813025712967, 0.23001869022846222, 0.20728859305381775, 0.24771104753017426, 0.3840133845806122, 0.22992052137851715, 0.20010508596897125], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_2_16_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_12_1_0", "boxes": [[247, 0, 278, 39], [0, 1, 605, 526], [161, 70, 302, 298], [270, 69, 406, 296], [56, 65, 534, 423], [57, 417, 545, 423]], "scores": [0.2825113832950592, 0.32789385318756104, 0.20947694778442383, 0.20034880936145782, 0.684700071811676, 0.20460619032382965], "labels": ["triangle", "angle", "triangle", "triangle", "triangle", "line"]}, {"id": "VD_figure_2_5_1_0", "boxes": [[40, 39, 118, 284], [116, 59, 170, 280], [176, 66, 207, 279], [311, 122, 360, 281], [415, 130, 436, 284], [223, 85, 251, 281], [267, 104, 294, 281], [178, 172, 209, 280], [312, 171, 337, 281], [336, 172, 358, 277], [376, 123, 399, 282], [454, 147, 491, 283], [501, 137, 527, 283], [545, 161, 557, 279], [539, 160, 558, 280], [579, 159, 603, 284], [222, 196, 252, 283], [265, 196, 295, 284], [376, 213, 399, 284], [502, 207, 528, 284], [578, 213, 603, 285], [344, 195, 361, 282], [20, 298, 661, 317]], "scores": [0.5407125949859619, 0.5360004901885986, 0.4007730484008789, 0.4939378499984741, 0.43601760268211365, 0.45991769433021545, 0.4491517245769501, 0.3136873245239258, 0.23304925858974457, 0.2889191210269928, 0.4729636311531067, 0.3409607410430908, 0.5050593614578247, 0.2762613296508789, 0.45202013850212097, 0.497055321931839, 0.3934653699398041, 0.3809269964694977, 0.3608913719654083, 0.2858668565750122, 0.3197104036808014, 0.26585274934768677, 0.5676533579826355], "labels": ["hammer", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "handle", "handle", "handle", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "handle", "handle", "handle", "handle", "handle", "handle", "ruler"]}, {"id": "VD_video_1_10_0_0", "boxes": [[3532, 40, 3628, 118], [3, 16, 921, 387], [1962, 19, 2344, 382], [2172, 20, 3719, 382], [2805, 27, 3072, 333], [118, 27, 509, 386], [196, 94, 447, 335], [24, 14, 1853, 390], [1040, 24, 1433, 395], [1129, 92, 1372, 332], [943, 12, 1857, 391], [2039, 70, 2286, 290], [1902, 10, 2631, 377], [2797, 89, 2996, 300], [2746, 12, 3747, 378], [498, 104, 844, 446], [2795, 105, 2961, 296], [3400, 106, 3715, 417], [1845, 141, 2292, 435], [1198, 182, 1531, 413], [1468, 155, 1806, 436], [2440, 103, 2683, 417], [2792, 220, 3115, 460], [0, 199, 76, 437], [324, 179, 558, 406], [933, 202, 1029, 448], [2348, 99, 2678, 420], [1214, 158, 1803, 443], [3437, 31, 3718, 458]], "scores": [0.20124082267284393, 0.34967657923698425, 0.5101414918899536, 0.21324066817760468, 0.44689685106277466, 0.5181186199188232, 0.35462453961372375, 0.33214935660362244, 0.5338215231895447, 0.333118200302124, 0.33881404995918274, 0.3134152591228485, 0.3131914734840393, 0.22239325940608978, 0.3398140072822571, 0.4673309922218323, 0.23734073340892792, 0.4492887258529663, 0.4287789762020111, 0.23541347682476044, 0.3250073194503784, 0.2296696901321411, 0.38236263394355774, 0.30775004625320435, 0.4005674719810486, 0.4587398171424866, 0.412052184343338, 0.31756237149238586, 0.22987152636051178], "labels": ["animal", "car", "tire", "car", "tire", "tire", "rim", "car", "tire", "rim", "car", "rim", "car", "rim", "car", "animal", "rim", "wheel", "animal", "animal", "animal", "animal", "animal", "animal", "rim", "animal", "animal", "animal", "animal"]}, {"id": "VS_chart_0_4_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_20_1_0", "boxes": [[2, 3, 804, 817], [256, 151, 576, 794], [17, 799, 803, 820]], "scores": [0.49213579297065735, 0.4755053222179413, 0.201299786567688], "labels": ["graph", "pyramid", "graph"]}, {"id": "VS_table_0_9_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_3_1_2", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [127, 129, 155, 181], [203, 128, 237, 180], [246, 128, 274, 181], [25, 129, 54, 181], [83, 129, 118, 179], [162, 130, 196, 180], [9, 30, 287, 428], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 163, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 287, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 125, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.24305878579616547, 0.369390606880188, 0.3416445255279541, 0.2652149796485901, 0.3357091546058655, 0.3787643015384674, 0.326435387134552, 0.32467836141586304, 0.43446511030197144, 0.27442434430122375, 0.20771633088588715, 0.2357201874256134, 0.24885401129722595, 0.2613590657711029, 0.25927019119262695, 0.20610323548316956, 0.21544493734836578, 0.2237813025712967, 0.23001869022846222, 0.20728859305381775, 0.24771104753017426, 0.3840133845806122, 0.22992052137851715, 0.20010508596897125], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_1_17_0_0", "boxes": [[476, 185, 820, 676], [491, 202, 570, 591], [43, 44, 2959, 1412], [1209, 329, 1386, 944], [91, 602, 2919, 1386], [2190, 700, 2367, 1306], [23, -8, 3007, 1466]], "scores": [0.223448246717453, 0.21855176985263824, 0.34737104177474976, 0.39385464787483215, 0.3176874816417694, 0.36102741956710815, 0.35591498017311096], "labels": ["doorway", "doorway", "illustration", "businessman", "corridor", "businessman", "illustration"]}, {"id": "VD_math_2_12_1_1", "boxes": [[247, 0, 278, 39], [0, 1, 605, 526], [161, 70, 302, 298], [270, 69, 406, 296], [56, 65, 534, 423], [57, 417, 545, 423]], "scores": [0.2825113832950592, 0.32789385318756104, 0.20947694778442383, 0.20034880936145782, 0.684700071811676, 0.20460619032382965], "labels": ["triangle", "angle", "triangle", "triangle", "triangle", "line"]}, {"id": "VD_figure_2_5_1_1", "boxes": [[40, 39, 118, 284], [116, 59, 170, 280], [176, 66, 207, 279], [311, 122, 360, 281], [415, 130, 436, 284], [223, 85, 251, 281], [267, 104, 294, 281], [178, 172, 209, 280], [312, 171, 337, 281], [336, 172, 358, 277], [376, 123, 399, 282], [454, 147, 491, 283], [501, 137, 527, 283], [545, 161, 557, 279], [539, 160, 558, 280], [579, 159, 603, 284], [222, 196, 252, 283], [265, 196, 295, 284], [376, 213, 399, 284], [502, 207, 528, 284], [578, 213, 603, 285], [344, 195, 361, 282], [20, 298, 661, 317]], "scores": [0.5407125949859619, 0.5360004901885986, 0.4007730484008789, 0.4939378499984741, 0.43601760268211365, 0.45991769433021545, 0.4491517245769501, 0.3136873245239258, 0.23304925858974457, 0.2889191210269928, 0.4729636311531067, 0.3409607410430908, 0.5050593614578247, 0.2762613296508789, 0.45202013850212097, 0.497055321931839, 0.3934653699398041, 0.3809269964694977, 0.3608913719654083, 0.2858668565750122, 0.3197104036808014, 0.26585274934768677, 0.5676533579826355], "labels": ["hammer", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "handle", "handle", "handle", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "handle", "handle", "handle", "handle", "handle", "handle", "ruler"]}, {"id": "VD_video_1_10_0_1", "boxes": [[3532, 40, 3628, 118], [3, 16, 921, 387], [1962, 19, 2344, 382], [2172, 20, 3719, 382], [2805, 27, 3072, 333], [118, 27, 509, 386], [196, 94, 447, 335], [24, 14, 1853, 390], [1040, 24, 1433, 395], [1129, 92, 1372, 332], [943, 12, 1857, 391], [2039, 70, 2286, 290], [1902, 10, 2631, 377], [2797, 89, 2996, 300], [2746, 12, 3747, 378], [498, 104, 844, 446], [2795, 105, 2961, 296], [3400, 106, 3715, 417], [1845, 141, 2292, 435], [1198, 182, 1531, 413], [1468, 155, 1806, 436], [2440, 103, 2683, 417], [2792, 220, 3115, 460], [0, 199, 76, 437], [324, 179, 558, 406], [933, 202, 1029, 448], [2348, 99, 2678, 420], [1214, 158, 1803, 443], [3437, 31, 3718, 458]], "scores": [0.20124082267284393, 0.34967657923698425, 0.5101414918899536, 0.21324066817760468, 0.44689685106277466, 0.5181186199188232, 0.35462453961372375, 0.33214935660362244, 0.5338215231895447, 0.333118200302124, 0.33881404995918274, 0.3134152591228485, 0.3131914734840393, 0.22239325940608978, 0.3398140072822571, 0.4673309922218323, 0.23734073340892792, 0.4492887258529663, 0.4287789762020111, 0.23541347682476044, 0.3250073194503784, 0.2296696901321411, 0.38236263394355774, 0.30775004625320435, 0.4005674719810486, 0.4587398171424866, 0.412052184343338, 0.31756237149238586, 0.22987152636051178], "labels": ["animal", "car", "tire", "car", "tire", "tire", "rim", "car", "tire", "rim", "car", "rim", "car", "rim", "car", "animal", "rim", "wheel", "animal", "animal", "animal", "animal", "animal", "animal", "rim", "animal", "animal", "animal", "animal"]}, {"id": "VS_chart_0_4_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_1_20_1_1", "boxes": [[2, 3, 804, 817], [256, 151, 576, 794], [17, 799, 803, 820]], "scores": [0.49213579297065735, 0.4755053222179413, 0.201299786567688], "labels": ["graph", "pyramid", "graph"]}, {"id": "VS_table_0_9_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_2_3_2_0", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [124, 129, 158, 181], [203, 128, 237, 180], [246, 129, 274, 181], [25, 129, 54, 182], [83, 129, 118, 180], [162, 130, 196, 180], [9, 29, 287, 430], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 162, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 286, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 117, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.23924598097801208, 0.36869123578071594, 0.2950306534767151, 0.2667487859725952, 0.34645533561706543, 0.38871830701828003, 0.33151569962501526, 0.32622411847114563, 0.425184428691864, 0.273041307926178, 0.20575374364852905, 0.23817847669124603, 0.2513320744037628, 0.26820284128189087, 0.26199138164520264, 0.20803511142730713, 0.21972747147083282, 0.22309410572052002, 0.2314242720603943, 0.20891375839710236, 0.24927937984466553, 0.4052715599536896, 0.230825737118721, 0.20033016800880432], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_1_17_0_1", "boxes": [[476, 185, 820, 676], [491, 202, 570, 591], [43, 44, 2959, 1412], [1209, 329, 1386, 944], [91, 602, 2919, 1386], [2190, 700, 2367, 1306], [23, -8, 3007, 1466]], "scores": [0.223448246717453, 0.21855176985263824, 0.34737104177474976, 0.39385464787483215, 0.3176874816417694, 0.36102741956710815, 0.35591498017311096], "labels": ["doorway", "doorway", "illustration", "businessman", "corridor", "businessman", "illustration"]}, {"id": "VD_math_2_12_1_2", "boxes": [[247, 0, 278, 39], [0, 1, 605, 526], [161, 70, 302, 298], [270, 69, 406, 296], [56, 65, 534, 423], [57, 417, 545, 423]], "scores": [0.2825113832950592, 0.32789385318756104, 0.20947694778442383, 0.20034880936145782, 0.684700071811676, 0.20460619032382965], "labels": ["triangle", "angle", "triangle", "triangle", "triangle", "line"]}, {"id": "VD_figure_2_5_2_0", "boxes": [[25, 32, 125, 166], [26, 31, 125, 286], [130, 63, 174, 280], [181, 67, 211, 278], [311, 125, 360, 280], [414, 132, 434, 282], [225, 87, 253, 280], [268, 105, 296, 280], [61, 164, 87, 283], [312, 172, 360, 279], [336, 173, 357, 274], [541, 162, 554, 278], [181, 171, 212, 278], [314, 172, 338, 277], [375, 124, 398, 281], [452, 147, 488, 281], [499, 138, 524, 282], [534, 161, 554, 279], [225, 195, 255, 281], [574, 161, 598, 282], [267, 196, 296, 282], [499, 207, 525, 283], [375, 213, 398, 282], [574, 213, 599, 282], [344, 192, 362, 280], [29, 297, 657, 318]], "scores": [0.28368431329727173, 0.40225136280059814, 0.4396582841873169, 0.41011425852775574, 0.5062651634216309, 0.4468039274215698, 0.47064241766929626, 0.46028250455856323, 0.5997320413589478, 0.21638716757297516, 0.28645509481430054, 0.2224733829498291, 0.3480870723724365, 0.2653454542160034, 0.48439520597457886, 0.36507338285446167, 0.523337185382843, 0.4761975407600403, 0.40690869092941284, 0.5209150314331055, 0.41597068309783936, 0.3011612892150879, 0.37476930022239685, 0.32769104838371277, 0.2622343599796295, 0.5573588013648987], "labels": ["parsley", "carrot", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "carrot", "handle", "handle", "screwdriver", "handle", "handle", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "handle", "screwdriver", "handle", "handle", "handle", "handle", "handle", "ruler"]}, {"id": "VD_video_1_10_0_2", "boxes": [[3532, 40, 3628, 118], [3, 16, 921, 387], [1962, 19, 2344, 382], [2172, 20, 3719, 382], [2805, 27, 3072, 333], [118, 27, 509, 386], [196, 94, 447, 335], [24, 14, 1853, 390], [1040, 24, 1433, 395], [1129, 92, 1372, 332], [943, 12, 1857, 391], [2039, 70, 2286, 290], [1902, 10, 2631, 377], [2797, 89, 2996, 300], [2746, 12, 3747, 378], [498, 104, 844, 446], [2795, 105, 2961, 296], [3400, 106, 3715, 417], [1845, 141, 2292, 435], [1198, 182, 1531, 413], [1468, 155, 1806, 436], [2440, 103, 2683, 417], [2792, 220, 3115, 460], [0, 199, 76, 437], [324, 179, 558, 406], [933, 202, 1029, 448], [2348, 99, 2678, 420], [1214, 158, 1803, 443], [3437, 31, 3718, 458]], "scores": [0.20124082267284393, 0.34967657923698425, 0.5101414918899536, 0.21324066817760468, 0.44689685106277466, 0.5181186199188232, 0.35462453961372375, 0.33214935660362244, 0.5338215231895447, 0.333118200302124, 0.33881404995918274, 0.3134152591228485, 0.3131914734840393, 0.22239325940608978, 0.3398140072822571, 0.4673309922218323, 0.23734073340892792, 0.4492887258529663, 0.4287789762020111, 0.23541347682476044, 0.3250073194503784, 0.2296696901321411, 0.38236263394355774, 0.30775004625320435, 0.4005674719810486, 0.4587398171424866, 0.412052184343338, 0.31756237149238586, 0.22987152636051178], "labels": ["animal", "car", "tire", "car", "tire", "tire", "rim", "car", "tire", "rim", "car", "rim", "car", "rim", "car", "animal", "rim", "wheel", "animal", "animal", "animal", "animal", "animal", "animal", "rim", "animal", "animal", "animal", "animal"]}, {"id": "VS_chart_2_4_1_0", "boxes": [[52, -8, 1748, 940]], "scores": [0.5943771600723267], "labels": ["graph"]}, {"id": "VS_chart_2_20_2_0", "boxes": [[3, 1, 731, 730], [236, 139, 528, 711], [12, 106, 48, 716], [29, 713, 726, 731]], "scores": [0.5249504446983337, 0.5160407423973083, 0.22812946140766144, 0.22057169675827026], "labels": ["graph", "pyramid", "graph", "graph"]}, {"id": "VS_table_0_9_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_2_3_2_1", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [124, 129, 158, 181], [203, 128, 237, 180], [246, 129, 274, 181], [25, 129, 54, 182], [83, 129, 118, 180], [162, 130, 196, 180], [9, 29, 287, 430], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 162, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 286, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 117, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.23924598097801208, 0.36869123578071594, 0.2950306534767151, 0.2667487859725952, 0.34645533561706543, 0.38871830701828003, 0.33151569962501526, 0.32622411847114563, 0.425184428691864, 0.273041307926178, 0.20575374364852905, 0.23817847669124603, 0.2513320744037628, 0.26820284128189087, 0.26199138164520264, 0.20803511142730713, 0.21972747147083282, 0.22309410572052002, 0.2314242720603943, 0.20891375839710236, 0.24927937984466553, 0.4052715599536896, 0.230825737118721, 0.20033016800880432], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_2_17_1_0", "boxes": [[28, 38, 2956, 1410], [471, 185, 821, 676], [491, 201, 570, 590], [1179, 150, 1409, 960], [91, 605, 2920, 1384], [2190, 700, 2368, 1306], [22, -5, 3006, 1469]], "scores": [0.3326476216316223, 0.2592281401157379, 0.2259562611579895, 0.393204003572464, 0.3082590401172638, 0.35869520902633667, 0.3214534819126129], "labels": ["illustration", "doorway", "doorway", "businessman", "corridor", "businessman", "illustration"]}, {"id": "VD_math_1_13_0_0", "boxes": [[465, 9, 496, 47], [0, 0, 549, 444], [56, 45, 508, 399]], "scores": [0.3073160648345947, 0.24848195910453796, 0.562919557094574], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_5_2_1", "boxes": [[25, 32, 125, 166], [26, 31, 125, 286], [130, 63, 174, 280], [181, 67, 211, 278], [311, 125, 360, 280], [414, 132, 434, 282], [225, 87, 253, 280], [268, 105, 296, 280], [61, 164, 87, 283], [312, 172, 360, 279], [336, 173, 357, 274], [541, 162, 554, 278], [181, 171, 212, 278], [314, 172, 338, 277], [375, 124, 398, 281], [452, 147, 488, 281], [499, 138, 524, 282], [534, 161, 554, 279], [225, 195, 255, 281], [574, 161, 598, 282], [267, 196, 296, 282], [499, 207, 525, 283], [375, 213, 398, 282], [574, 213, 599, 282], [344, 192, 362, 280], [29, 297, 657, 318]], "scores": [0.28368431329727173, 0.40225136280059814, 0.4396582841873169, 0.41011425852775574, 0.5062651634216309, 0.4468039274215698, 0.47064241766929626, 0.46028250455856323, 0.5997320413589478, 0.21638716757297516, 0.28645509481430054, 0.2224733829498291, 0.3480870723724365, 0.2653454542160034, 0.48439520597457886, 0.36507338285446167, 0.523337185382843, 0.4761975407600403, 0.40690869092941284, 0.5209150314331055, 0.41597068309783936, 0.3011612892150879, 0.37476930022239685, 0.32769104838371277, 0.2622343599796295, 0.5573588013648987], "labels": ["parsley", "carrot", "wrench", "screwdriver", "plier", "screwdriver", "screwdriver", "screwdriver", "carrot", "handle", "handle", "screwdriver", "handle", "handle", "screwdriver", "screwdriver", "screwdriver", "screwdriver", "handle", "screwdriver", "handle", "handle", "handle", "handle", "handle", "ruler"]}, {"id": "VD_video_1_10_0_3", "boxes": [[3532, 40, 3628, 118], [3, 16, 921, 387], [1962, 19, 2344, 382], [2172, 20, 3719, 382], [2805, 27, 3072, 333], [118, 27, 509, 386], [196, 94, 447, 335], [24, 14, 1853, 390], [1040, 24, 1433, 395], [1129, 92, 1372, 332], [943, 12, 1857, 391], [2039, 70, 2286, 290], [1902, 10, 2631, 377], [2797, 89, 2996, 300], [2746, 12, 3747, 378], [498, 104, 844, 446], [2795, 105, 2961, 296], [3400, 106, 3715, 417], [1845, 141, 2292, 435], [1198, 182, 1531, 413], [1468, 155, 1806, 436], [2440, 103, 2683, 417], [2792, 220, 3115, 460], [0, 199, 76, 437], [324, 179, 558, 406], [933, 202, 1029, 448], [2348, 99, 2678, 420], [1214, 158, 1803, 443], [3437, 31, 3718, 458]], "scores": [0.20124082267284393, 0.34967657923698425, 0.5101414918899536, 0.21324066817760468, 0.44689685106277466, 0.5181186199188232, 0.35462453961372375, 0.33214935660362244, 0.5338215231895447, 0.333118200302124, 0.33881404995918274, 0.3134152591228485, 0.3131914734840393, 0.22239325940608978, 0.3398140072822571, 0.4673309922218323, 0.23734073340892792, 0.4492887258529663, 0.4287789762020111, 0.23541347682476044, 0.3250073194503784, 0.2296696901321411, 0.38236263394355774, 0.30775004625320435, 0.4005674719810486, 0.4587398171424866, 0.412052184343338, 0.31756237149238586, 0.22987152636051178], "labels": ["animal", "car", "tire", "car", "tire", "tire", "rim", "car", "tire", "rim", "car", "rim", "car", "rim", "car", "animal", "rim", "wheel", "animal", "animal", "animal", "animal", "animal", "animal", "rim", "animal", "animal", "animal", "animal"]}, {"id": "VS_chart_2_4_1_1", "boxes": [[52, -8, 1748, 940]], "scores": [0.5943771600723267], "labels": ["graph"]}, {"id": "VS_chart_2_20_2_1", "boxes": [[3, 1, 731, 730], [236, 139, 528, 711], [12, 106, 48, 716], [29, 713, 726, 731]], "scores": [0.5249504446983337, 0.5160407423973083, 0.22812946140766144, 0.22057169675827026], "labels": ["graph", "pyramid", "graph", "graph"]}, {"id": "VS_table_1_9_1_0", "boxes": [[51, 48, 938, 83], [578, 265, 595, 288], [202, 159, 738, 548], [568, 343, 578, 364], [568, 420, 578, 441], [574, 496, 590, 519], [598, 496, 614, 519]], "scores": [0.36554694175720215, 0.21496111154556274, 0.2156088650226593, 0.2152412235736847, 0.22971633076667786, 0.296612948179245, 0.23035815358161926], "labels": ["text", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_3_2_2", "boxes": [[155, 40, 192, 102], [6, 4, 294, 453], [124, 129, 158, 181], [203, 128, 237, 180], [246, 129, 274, 181], [25, 129, 54, 182], [83, 129, 118, 180], [162, 130, 196, 180], [9, 29, 287, 430], [14, 199, 38, 241], [44, 199, 69, 242], [75, 199, 100, 241], [106, 199, 132, 240], [137, 199, 162, 241], [199, 199, 224, 241], [229, 199, 255, 241], [260, 199, 286, 241], [13, 254, 33, 288], [37, 254, 59, 288], [139, 255, 161, 288], [166, 255, 184, 288], [9, 117, 288, 422], [12, 299, 29, 325], [270, 299, 287, 326]], "scores": [0.23924598097801208, 0.36869123578071594, 0.2950306534767151, 0.2667487859725952, 0.34645533561706543, 0.38871830701828003, 0.33151569962501526, 0.32622411847114563, 0.425184428691864, 0.273041307926178, 0.20575374364852905, 0.23817847669124603, 0.2513320744037628, 0.26820284128189087, 0.26199138164520264, 0.20803511142730713, 0.21972747147083282, 0.22309410572052002, 0.2314242720603943, 0.20891375839710236, 0.24927937984466553, 0.4052715599536896, 0.230825737118721, 0.20033016800880432], "labels": ["number", "poster", "number", "number", "number", "number", "number", "number", "font", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "font", "number", "number"]}, {"id": "VD_illusion_2_17_1_1", "boxes": [[28, 38, 2956, 1410], [471, 185, 821, 676], [491, 201, 570, 590], [1179, 150, 1409, 960], [91, 605, 2920, 1384], [2190, 700, 2368, 1306], [22, -5, 3006, 1469]], "scores": [0.3326476216316223, 0.2592281401157379, 0.2259562611579895, 0.393204003572464, 0.3082590401172638, 0.35869520902633667, 0.3214534819126129], "labels": ["illustration", "doorway", "doorway", "businessman", "corridor", "businessman", "illustration"]}, {"id": "VD_math_1_13_0_1", "boxes": [[465, 9, 496, 47], [0, 0, 549, 444], [56, 45, 508, 399]], "scores": [0.3073160648345947, 0.24848195910453796, 0.562919557094574], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_6_0_0", "boxes": [[7, 7, 641, 654], [51, 16, 632, 642], [244, 439, 378, 500], [242, 421, 382, 550]], "scores": [0.4608056843280792, 0.2465769201517105, 0.20531782507896423, 0.3670261800289154], "labels": ["circle", "man", "bow tie", "bow tie"]}, {"id": "VD_video_2_10_1_0", "boxes": [[1, 34, 278, 336], [1035, 21, 1427, 381], [1967, 27, 2368, 403], [2907, 30, 3306, 396], [1111, 73, 1355, 299], [2054, 96, 2297, 341], [2989, 94, 3242, 338], [3, 114, 164, 296], [0, 102, 192, 304], [0, 13, 1799, 381], [611, 107, 930, 417], [1977, 10, 3747, 399], [3119, 189, 3350, 410]], "scores": [0.47745242714881897, 0.5175603628158569, 0.5253516435623169, 0.5055396556854248, 0.3344748318195343, 0.3065383732318878, 0.3077198266983032, 0.23649291694164276, 0.2014138549566269, 0.3853908181190491, 0.4246361553668976, 0.35676369071006775, 0.2468162178993225], "labels": ["tire", "tire", "tire", "tire", "rim", "rim", "rim", "rim", "rim", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_1_2", "boxes": [[52, -8, 1748, 940]], "scores": [0.5943771600723267], "labels": ["graph"]}, {"id": "VS_chart_0_21_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_9_1_1", "boxes": [[51, 48, 938, 83], [578, 265, 595, 288], [202, 159, 738, 548], [568, 343, 578, 364], [568, 420, 578, 441], [574, 496, 590, 519], [598, 496, 614, 519]], "scores": [0.36554694175720215, 0.21496111154556274, 0.2156088650226593, 0.2152412235736847, 0.22971633076667786, 0.296612948179245, 0.23035815358161926], "labels": ["text", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_4_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_18_0_0", "boxes": [[59, 49, 2978, 1415], [486, 198, 876, 720], [1442, 201, 1725, 1110], [2205, 408, 2486, 1318], [15, -8, 3009, 1473]], "scores": [0.31113487482070923, 0.4319002330303192, 0.24769631028175354, 0.2586613893508911, 0.349172443151474], "labels": ["elevator", "doorway", "person", "person", "illustration"]}, {"id": "VD_math_1_13_0_2", "boxes": [[465, 9, 496, 47], [0, 0, 549, 444], [56, 45, 508, 399]], "scores": [0.3073160648345947, 0.24848195910453796, 0.562919557094574], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_6_0_1", "boxes": [[7, 7, 641, 654], [51, 16, 632, 642], [244, 439, 378, 500], [242, 421, 382, 550]], "scores": [0.4608056843280792, 0.2465769201517105, 0.20531782507896423, 0.3670261800289154], "labels": ["circle", "man", "bow tie", "bow tie"]}, {"id": "VD_video_2_10_1_1", "boxes": [[1, 34, 278, 336], [1035, 21, 1427, 381], [1967, 27, 2368, 403], [2907, 30, 3306, 396], [1111, 73, 1355, 299], [2054, 96, 2297, 341], [2989, 94, 3242, 338], [3, 114, 164, 296], [0, 102, 192, 304], [0, 13, 1799, 381], [611, 107, 930, 417], [1977, 10, 3747, 399], [3119, 189, 3350, 410]], "scores": [0.47745242714881897, 0.5175603628158569, 0.5253516435623169, 0.5055396556854248, 0.3344748318195343, 0.3065383732318878, 0.3077198266983032, 0.23649291694164276, 0.2014138549566269, 0.3853908181190491, 0.4246361553668976, 0.35676369071006775, 0.2468162178993225], "labels": ["tire", "tire", "tire", "tire", "rim", "rim", "rim", "rim", "rim", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_1_3", "boxes": [[52, -8, 1748, 940]], "scores": [0.5943771600723267], "labels": ["graph"]}, {"id": "VS_chart_0_21_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_9_1_2", "boxes": [[51, 48, 938, 83], [578, 265, 595, 288], [202, 159, 738, 548], [568, 343, 578, 364], [568, 420, 578, 441], [574, 496, 590, 519], [598, 496, 614, 519]], "scores": [0.36554694175720215, 0.21496111154556274, 0.2156088650226593, 0.2152412235736847, 0.22971633076667786, 0.296612948179245, 0.23035815358161926], "labels": ["text", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_4_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_18_0_1", "boxes": [[59, 49, 2978, 1415], [486, 198, 876, 720], [1442, 201, 1725, 1110], [2205, 408, 2486, 1318], [15, -8, 3009, 1473]], "scores": [0.31113487482070923, 0.4319002330303192, 0.24769631028175354, 0.2586613893508911, 0.349172443151474], "labels": ["elevator", "doorway", "person", "person", "illustration"]}, {"id": "VD_math_2_13_1_0", "boxes": [[430, 10, 458, 45], [0, 0, 548, 418], [56, 38, 471, 365]], "scores": [0.37696799635887146, 0.23955950140953064, 0.5571295022964478], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_6_1_0", "boxes": [[155, 100, 422, 446], [14, 17, 632, 642], [230, 317, 372, 361], [68, 22, 568, 632], [247, 446, 382, 498], [245, 446, 383, 550]], "scores": [0.20164717733860016, 0.43846386671066284, 0.23915359377861023, 0.47890782356262207, 0.338140606880188, 0.6623261570930481], "labels": ["face", "image", "smile", "image", "bow tie", "bow tie"]}, {"id": "VD_video_2_10_1_2", "boxes": [[1, 34, 278, 336], [1035, 21, 1427, 381], [1967, 27, 2368, 403], [2907, 30, 3306, 396], [1111, 73, 1355, 299], [2054, 96, 2297, 341], [2989, 94, 3242, 338], [3, 114, 164, 296], [0, 102, 192, 304], [0, 13, 1799, 381], [611, 107, 930, 417], [1977, 10, 3747, 399], [3119, 189, 3350, 410]], "scores": [0.47745242714881897, 0.5175603628158569, 0.5253516435623169, 0.5055396556854248, 0.3344748318195343, 0.3065383732318878, 0.3077198266983032, 0.23649291694164276, 0.2014138549566269, 0.3853908181190491, 0.4246361553668976, 0.35676369071006775, 0.2468162178993225], "labels": ["tire", "tire", "tire", "tire", "rim", "rim", "rim", "rim", "rim", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_2_0", "boxes": [[8, -1, 1537, 1055], [225, 63, 1399, 868], [78, 43, 1538, 1022]], "scores": [0.5522618293762207, 0.25351980328559875, 0.5220376253128052], "labels": ["graph", "graph", "graph"]}, {"id": "VS_chart_1_21_1_0", "boxes": [[5, 4, 984, 1015], [264, 113, 866, 853], [24, 101, 945, 879], [259, 887, 268, 899]], "scores": [0.29821157455444336, 0.3778286576271057, 0.3345095217227936, 0.2195812463760376], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_1_9_1_3", "boxes": [[51, 48, 938, 83], [578, 265, 595, 288], [202, 159, 738, 548], [568, 343, 578, 364], [568, 420, 578, 441], [574, 496, 590, 519], [598, 496, 614, 519]], "scores": [0.36554694175720215, 0.21496111154556274, 0.2156088650226593, 0.2152412235736847, 0.22971633076667786, 0.296612948179245, 0.23035815358161926], "labels": ["text", "number", "text", "number", "number", "number", "number"]}, {"id": "VS_ocr_1_4_1_0", "boxes": [[442, 91, 470, 143]], "scores": [0.2634185254573822], "labels": ["number"]}, {"id": "VD_illusion_2_18_1_0", "boxes": [[55, 48, 2978, 1420], [485, 199, 876, 720], [1444, 201, 1726, 1110], [2120, 76, 2548, 1453], [15, -13, 3011, 1462]], "scores": [0.3264099061489105, 0.4209901988506317, 0.26975491642951965, 0.3186202943325043, 0.3180350959300995], "labels": ["maze", "doorway", "man", "man", "illustration"]}, {"id": "VD_math_2_13_1_1", "boxes": [[430, 10, 458, 45], [0, 0, 548, 418], [56, 38, 471, 365]], "scores": [0.37696799635887146, 0.23955950140953064, 0.5571295022964478], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_6_1_1", "boxes": [[155, 100, 422, 446], [14, 17, 632, 642], [230, 317, 372, 361], [68, 22, 568, 632], [247, 446, 382, 498], [245, 446, 383, 550]], "scores": [0.20164717733860016, 0.43846386671066284, 0.23915359377861023, 0.47890782356262207, 0.338140606880188, 0.6623261570930481], "labels": ["face", "image", "smile", "image", "bow tie", "bow tie"]}, {"id": "VD_video_2_10_1_3", "boxes": [[1, 34, 278, 336], [1035, 21, 1427, 381], [1967, 27, 2368, 403], [2907, 30, 3306, 396], [1111, 73, 1355, 299], [2054, 96, 2297, 341], [2989, 94, 3242, 338], [3, 114, 164, 296], [0, 102, 192, 304], [0, 13, 1799, 381], [611, 107, 930, 417], [1977, 10, 3747, 399], [3119, 189, 3350, 410]], "scores": [0.47745242714881897, 0.5175603628158569, 0.5253516435623169, 0.5055396556854248, 0.3344748318195343, 0.3065383732318878, 0.3077198266983032, 0.23649291694164276, 0.2014138549566269, 0.3853908181190491, 0.4246361553668976, 0.35676369071006775, 0.2468162178993225], "labels": ["tire", "tire", "tire", "tire", "rim", "rim", "rim", "rim", "rim", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_2_1", "boxes": [[8, -1, 1537, 1055], [225, 63, 1399, 868], [78, 43, 1538, 1022]], "scores": [0.5522618293762207, 0.25351980328559875, 0.5220376253128052], "labels": ["graph", "graph", "graph"]}, {"id": "VS_chart_1_21_1_1", "boxes": [[5, 4, 984, 1015], [264, 113, 866, 853], [24, 101, 945, 879], [259, 887, 268, 899]], "scores": [0.29821157455444336, 0.3778286576271057, 0.3345095217227936, 0.2195812463760376], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_2_9_2_0", "boxes": [[588, 243, 603, 267], [578, 320, 588, 342], [618, 320, 629, 342], [616, 397, 632, 420], [584, 474, 601, 497], [609, 474, 624, 497]], "scores": [0.2284533977508545, 0.2521495819091797, 0.2021327018737793, 0.2055538445711136, 0.3507993221282959, 0.25870808959007263], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_1_4_1_1", "boxes": [[442, 91, 470, 143]], "scores": [0.2634185254573822], "labels": ["number"]}, {"id": "VD_illusion_2_18_1_1", "boxes": [[55, 48, 2978, 1420], [485, 199, 876, 720], [1444, 201, 1726, 1110], [2120, 76, 2548, 1453], [15, -13, 3011, 1462]], "scores": [0.3264099061489105, 0.4209901988506317, 0.26975491642951965, 0.3186202943325043, 0.3180350959300995], "labels": ["maze", "doorway", "man", "man", "illustration"]}, {"id": "VD_math_2_13_1_2", "boxes": [[430, 10, 458, 45], [0, 0, 548, 418], [56, 38, 471, 365]], "scores": [0.37696799635887146, 0.23955950140953064, 0.5571295022964478], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_7_0_0", "boxes": [[130, 34, 282, 373], [140, 43, 270, 368], [227, 187, 274, 235]], "scores": [0.5716171264648438, 0.6291016936302185, 0.6950126886367798], "labels": ["icon", "basketball player", "basketball"]}, {"id": "VD_video_2_10_2_0", "boxes": [[4, 33, 281, 347], [1964, 21, 2357, 387], [2110, 25, 3770, 400], [3, 88, 218, 316], [1045, 27, 1437, 401], [1132, 93, 1377, 337], [953, 16, 1854, 401], [2045, 72, 2294, 299], [2912, 28, 3315, 396], [2991, 97, 3245, 339], [6, 111, 176, 304], [16, 16, 958, 358], [8, 16, 1797, 401], [617, 111, 929, 413], [-81, 31, 3557, 411], [3122, 191, 3352, 409]], "scores": [0.4612576961517334, 0.4908100366592407, 0.32097575068473816, 0.2007770538330078, 0.5050612688064575, 0.2915138900279999, 0.22787214815616608, 0.2805153429508209, 0.510693371295929, 0.30648666620254517, 0.22778774797916412, 0.23144026100635529, 0.34086528420448303, 0.29100051522254944, 0.2386741042137146, 0.22275932133197784], "labels": ["tire", "tire", "car", "rim", "tire", "rim", "car", "rim", "tire", "rim", "rim", "car", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_2_2", "boxes": [[8, -1, 1537, 1055], [225, 63, 1399, 868], [78, 43, 1538, 1022]], "scores": [0.5522618293762207, 0.25351980328559875, 0.5220376253128052], "labels": ["graph", "graph", "graph"]}, {"id": "VS_chart_2_21_2_0", "boxes": [[5, 4, 984, 1015], [264, 114, 866, 853], [23, 101, 946, 878], [259, 887, 268, 899]], "scores": [0.3042530119419098, 0.38350552320480347, 0.2817378044128418, 0.21902990341186523], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_2_9_2_1", "boxes": [[588, 243, 603, 267], [578, 320, 588, 342], [618, 320, 629, 342], [616, 397, 632, 420], [584, 474, 601, 497], [609, 474, 624, 497]], "scores": [0.2284533977508545, 0.2521495819091797, 0.2021327018737793, 0.2055538445711136, 0.3507993221282959, 0.25870808959007263], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_4_2_0", "boxes": [[483, 98, 513, 154], [545, 217, 574, 258]], "scores": [0.241623654961586, 0.2381325513124466], "labels": ["number", "number"]}, {"id": "VD_illusion_1_19_0_0", "boxes": [[41, 50, 473, 494], [558, 48, 994, 493], [124, 132, 396, 410], [638, 132, 909, 410]], "scores": [0.307871550321579, 0.35173332691192627, 0.5509886741638184, 0.5541269779205322], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_1_14_0_0", "boxes": [[267, 9, 299, 48], [0, 1, 623, 506], [76, 74, 552, 431]], "scores": [0.26491066813468933, 0.289143830537796, 0.7028480172157288], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_7_0_1", "boxes": [[130, 34, 282, 373], [140, 43, 270, 368], [227, 187, 274, 235]], "scores": [0.5716171264648438, 0.6291016936302185, 0.6950126886367798], "labels": ["icon", "basketball player", "basketball"]}, {"id": "VD_video_2_10_2_1", "boxes": [[4, 33, 281, 347], [1964, 21, 2357, 387], [2110, 25, 3770, 400], [3, 88, 218, 316], [1045, 27, 1437, 401], [1132, 93, 1377, 337], [953, 16, 1854, 401], [2045, 72, 2294, 299], [2912, 28, 3315, 396], [2991, 97, 3245, 339], [6, 111, 176, 304], [16, 16, 958, 358], [8, 16, 1797, 401], [617, 111, 929, 413], [-81, 31, 3557, 411], [3122, 191, 3352, 409]], "scores": [0.4612576961517334, 0.4908100366592407, 0.32097575068473816, 0.2007770538330078, 0.5050612688064575, 0.2915138900279999, 0.22787214815616608, 0.2805153429508209, 0.510693371295929, 0.30648666620254517, 0.22778774797916412, 0.23144026100635529, 0.34086528420448303, 0.29100051522254944, 0.2386741042137146, 0.22275932133197784], "labels": ["tire", "tire", "car", "rim", "tire", "rim", "car", "rim", "tire", "rim", "rim", "car", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_2_4_2_3", "boxes": [[8, -1, 1537, 1055], [225, 63, 1399, 868], [78, 43, 1538, 1022]], "scores": [0.5522618293762207, 0.25351980328559875, 0.5220376253128052], "labels": ["graph", "graph", "graph"]}, {"id": "VS_chart_2_21_2_1", "boxes": [[5, 4, 984, 1015], [264, 114, 866, 853], [23, 101, 946, 878], [259, 887, 268, 899]], "scores": [0.3042530119419098, 0.38350552320480347, 0.2817378044128418, 0.21902990341186523], "labels": ["graph", "graph", "graph", "number"]}, {"id": "VS_table_2_9_2_2", "boxes": [[588, 243, 603, 267], [578, 320, 588, 342], [618, 320, 629, 342], [616, 397, 632, 420], [584, 474, 601, 497], [609, 474, 624, 497]], "scores": [0.2284533977508545, 0.2521495819091797, 0.2021327018737793, 0.2055538445711136, 0.3507993221282959, 0.25870808959007263], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_4_2_1", "boxes": [[483, 98, 513, 154], [545, 217, 574, 258]], "scores": [0.241623654961586, 0.2381325513124466], "labels": ["number", "number"]}, {"id": "VD_illusion_1_19_0_1", "boxes": [[41, 50, 473, 494], [558, 48, 994, 493], [124, 132, 396, 410], [638, 132, 909, 410]], "scores": [0.307871550321579, 0.35173332691192627, 0.5509886741638184, 0.5541269779205322], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_1_14_0_1", "boxes": [[267, 9, 299, 48], [0, 1, 623, 506], [76, 74, 552, 431]], "scores": [0.26491066813468933, 0.289143830537796, 0.7028480172157288], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_7_1_0", "boxes": [[0, 1, 262, 303], [7, 5, 258, 302], [208, 113, 262, 174]], "scores": [0.21377918124198914, 0.6690236330032349, 0.614916980266571], "labels": ["square", "basketball player", "basketball"]}, {"id": "VD_video_2_10_2_2", "boxes": [[4, 33, 281, 347], [1964, 21, 2357, 387], [2110, 25, 3770, 400], [3, 88, 218, 316], [1045, 27, 1437, 401], [1132, 93, 1377, 337], [953, 16, 1854, 401], [2045, 72, 2294, 299], [2912, 28, 3315, 396], [2991, 97, 3245, 339], [6, 111, 176, 304], [16, 16, 958, 358], [8, 16, 1797, 401], [617, 111, 929, 413], [-81, 31, 3557, 411], [3122, 191, 3352, 409]], "scores": [0.4612576961517334, 0.4908100366592407, 0.32097575068473816, 0.2007770538330078, 0.5050612688064575, 0.2915138900279999, 0.22787214815616608, 0.2805153429508209, 0.510693371295929, 0.30648666620254517, 0.22778774797916412, 0.23144026100635529, 0.34086528420448303, 0.29100051522254944, 0.2386741042137146, 0.22275932133197784], "labels": ["tire", "tire", "car", "rim", "tire", "rim", "car", "rim", "tire", "rim", "rim", "car", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_0_5_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_0_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_9_2_3", "boxes": [[588, 243, 603, 267], [578, 320, 588, 342], [618, 320, 629, 342], [616, 397, 632, 420], [584, 474, 601, 497], [609, 474, 624, 497]], "scores": [0.2284533977508545, 0.2521495819091797, 0.2021327018737793, 0.2055538445711136, 0.3507993221282959, 0.25870808959007263], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_5_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_19_0_2", "boxes": [[41, 50, 473, 494], [558, 48, 994, 493], [124, 132, 396, 410], [638, 132, 909, 410]], "scores": [0.307871550321579, 0.35173332691192627, 0.5509886741638184, 0.5541269779205322], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_1_14_0_2", "boxes": [[267, 9, 299, 48], [0, 1, 623, 506], [76, 74, 552, 431]], "scores": [0.26491066813468933, 0.289143830537796, 0.7028480172157288], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_7_1_1", "boxes": [[0, 1, 262, 303], [7, 5, 258, 302], [208, 113, 262, 174]], "scores": [0.21377918124198914, 0.6690236330032349, 0.614916980266571], "labels": ["square", "basketball player", "basketball"]}, {"id": "VD_video_2_10_2_3", "boxes": [[4, 33, 281, 347], [1964, 21, 2357, 387], [2110, 25, 3770, 400], [3, 88, 218, 316], [1045, 27, 1437, 401], [1132, 93, 1377, 337], [953, 16, 1854, 401], [2045, 72, 2294, 299], [2912, 28, 3315, 396], [2991, 97, 3245, 339], [6, 111, 176, 304], [16, 16, 958, 358], [8, 16, 1797, 401], [617, 111, 929, 413], [-81, 31, 3557, 411], [3122, 191, 3352, 409]], "scores": [0.4612576961517334, 0.4908100366592407, 0.32097575068473816, 0.2007770538330078, 0.5050612688064575, 0.2915138900279999, 0.22787214815616608, 0.2805153429508209, 0.510693371295929, 0.30648666620254517, 0.22778774797916412, 0.23144026100635529, 0.34086528420448303, 0.29100051522254944, 0.2386741042137146, 0.22275932133197784], "labels": ["tire", "tire", "car", "rim", "tire", "rim", "car", "rim", "tire", "rim", "rim", "car", "car", "wheel", "car", "rim"]}, {"id": "VS_chart_0_5_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_0_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_10_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_5_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_19_0_3", "boxes": [[41, 50, 473, 494], [558, 48, 994, 493], [124, 132, 396, 410], [638, 132, 909, 410]], "scores": [0.307871550321579, 0.35173332691192627, 0.5509886741638184, 0.5541269779205322], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_2_14_1_0", "boxes": [[309, 30, 341, 69], [1, 3, 649, 538], [92, 82, 571, 448]], "scores": [0.2597333788871765, 0.27022168040275574, 0.7447311282157898], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_8_0_0", "boxes": [[74, 3, 91, 15], [33, 11, 44, 20], [0, 0, 109, 35], [59, 91, 80, 112], [96, 94, 118, 115], [0, 75, 71, 215], [14, 107, 29, 120], [42, 117, 57, 138], [0, 131, 14, 152], [20, 133, 35, 150], [102, 134, 120, 153], [151, 128, 167, 145], [82, 95, 227, 236], [208, 141, 218, 159], [171, 145, 182, 156], [0, 158, 11, 175], [120, 155, 137, 174], [180, 154, 200, 176], [40, 160, 58, 173], [25, 166, 41, 186], [119, 183, 142, 204], [102, 198, 117, 214], [237, 202, 260, 223], [160, 199, 182, 223], [0, 223, 124, 356], [39, 227, 58, 245], [164, 239, 175, 256], [33, 246, 52, 263], [185, 247, 207, 269], [10, 259, 24, 279], [71, 260, 92, 282], [159, 263, 169, 284], [195, 271, 213, 288], [56, 273, 70, 291], [228, 285, 248, 307], [121, 296, 131, 316], [126, 228, 273, 372], [176, 307, 191, 325], [83, 310, 104, 329], [150, 312, 172, 332], [175, 334, 187, 346], [229, 347, 242, 360], [36, 360, 57, 381], [206, 360, 223, 374], [43, 376, 188, 448], [94, 405, 112, 421], [161, 414, 173, 434], [68, 423, 88, 445], [130, 424, 151, 444]], "scores": [0.30274131894111633, 0.2947244346141815, 0.20686329901218414, 0.5796380043029785, 0.5764985084533691, 0.23883534967899323, 0.2994225323200226, 0.2154645323753357, 0.3009876012802124, 0.2754676640033722, 0.4358275532722473, 0.3509672284126282, 0.44197511672973633, 0.24638858437538147, 0.3605147898197174, 0.25960758328437805, 0.42652633786201477, 0.5648561716079712, 0.2993379235267639, 0.35493046045303345, 0.5765834450721741, 0.3200221359729767, 0.6100279688835144, 0.5044611692428589, 0.4476201832294464, 0.2731228470802307, 0.2649543285369873, 0.46583715081214905, 0.5196912884712219, 0.2746894657611847, 0.48673415184020996, 0.24874182045459747, 0.4023233950138092, 0.25518062710762024, 0.506584107875824, 0.35258111357688904, 0.43545323610305786, 0.30777406692504883, 0.5036965608596802, 0.5243728160858154, 0.20767882466316223, 0.22065559029579163, 0.5883576273918152, 0.2767038941383362, 0.3548068106174469, 0.5240054130554199, 0.3952828049659729, 0.46102455258369446, 0.47366634011268616], "labels": ["chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip"]}, {"id": "VD_video_1_11_0_0", "boxes": [[168, 22, 539, 443], [1278, 7, 1482, 426], [1543, 7, 1896, 628], [175, 28, 736, 628], [2279, 224, 2395, 556], [-71, 77, 3559, 629]], "scores": [0.26641976833343506, 0.29470595717430115, 0.2444974035024643, 0.23732727766036987, 0.2677874267101288, 0.20466047525405884], "labels": ["woman", "woman", "woman", "woman", "woman", "image"]}, {"id": "VS_chart_0_5_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_0_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_0_10_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_5_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_19_1_0", "boxes": [[564, 73, 998, 516], [48, 77, 478, 519], [643, 157, 914, 437], [131, 158, 402, 435]], "scores": [0.3622828722000122, 0.3208995759487152, 0.5669897198677063, 0.5629739165306091], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_2_14_1_1", "boxes": [[309, 30, 341, 69], [1, 3, 649, 538], [92, 82, 571, 448]], "scores": [0.2597333788871765, 0.27022168040275574, 0.7447311282157898], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_8_0_1", "boxes": [[74, 3, 91, 15], [33, 11, 44, 20], [0, 0, 109, 35], [59, 91, 80, 112], [96, 94, 118, 115], [0, 75, 71, 215], [14, 107, 29, 120], [42, 117, 57, 138], [0, 131, 14, 152], [20, 133, 35, 150], [102, 134, 120, 153], [151, 128, 167, 145], [82, 95, 227, 236], [208, 141, 218, 159], [171, 145, 182, 156], [0, 158, 11, 175], [120, 155, 137, 174], [180, 154, 200, 176], [40, 160, 58, 173], [25, 166, 41, 186], [119, 183, 142, 204], [102, 198, 117, 214], [237, 202, 260, 223], [160, 199, 182, 223], [0, 223, 124, 356], [39, 227, 58, 245], [164, 239, 175, 256], [33, 246, 52, 263], [185, 247, 207, 269], [10, 259, 24, 279], [71, 260, 92, 282], [159, 263, 169, 284], [195, 271, 213, 288], [56, 273, 70, 291], [228, 285, 248, 307], [121, 296, 131, 316], [126, 228, 273, 372], [176, 307, 191, 325], [83, 310, 104, 329], [150, 312, 172, 332], [175, 334, 187, 346], [229, 347, 242, 360], [36, 360, 57, 381], [206, 360, 223, 374], [43, 376, 188, 448], [94, 405, 112, 421], [161, 414, 173, 434], [68, 423, 88, 445], [130, 424, 151, 444]], "scores": [0.30274131894111633, 0.2947244346141815, 0.20686329901218414, 0.5796380043029785, 0.5764985084533691, 0.23883534967899323, 0.2994225323200226, 0.2154645323753357, 0.3009876012802124, 0.2754676640033722, 0.4358275532722473, 0.3509672284126282, 0.44197511672973633, 0.24638858437538147, 0.3605147898197174, 0.25960758328437805, 0.42652633786201477, 0.5648561716079712, 0.2993379235267639, 0.35493046045303345, 0.5765834450721741, 0.3200221359729767, 0.6100279688835144, 0.5044611692428589, 0.4476201832294464, 0.2731228470802307, 0.2649543285369873, 0.46583715081214905, 0.5196912884712219, 0.2746894657611847, 0.48673415184020996, 0.24874182045459747, 0.4023233950138092, 0.25518062710762024, 0.506584107875824, 0.35258111357688904, 0.43545323610305786, 0.30777406692504883, 0.5036965608596802, 0.5243728160858154, 0.20767882466316223, 0.22065559029579163, 0.5883576273918152, 0.2767038941383362, 0.3548068106174469, 0.5240054130554199, 0.3952828049659729, 0.46102455258369446, 0.47366634011268616], "labels": ["chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip"]}, {"id": "VD_video_1_11_0_1", "boxes": [[168, 22, 539, 443], [1278, 7, 1482, 426], [1543, 7, 1896, 628], [175, 28, 736, 628], [2279, 224, 2395, 556], [-71, 77, 3559, 629]], "scores": [0.26641976833343506, 0.29470595717430115, 0.2444974035024643, 0.23732727766036987, 0.2677874267101288, 0.20466047525405884], "labels": ["woman", "woman", "woman", "woman", "woman", "image"]}, {"id": "VS_chart_0_5_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_0_1_0", "boxes": [[232, 251, 242, 265]], "scores": [0.21068082749843597], "labels": ["number"]}, {"id": "VS_table_0_10_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_5_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_19_1_1", "boxes": [[564, 73, 998, 516], [48, 77, 478, 519], [643, 157, 914, 437], [131, 158, 402, 435]], "scores": [0.3622828722000122, 0.3208995759487152, 0.5669897198677063, 0.5629739165306091], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_2_14_1_2", "boxes": [[309, 30, 341, 69], [1, 3, 649, 538], [92, 82, 571, 448]], "scores": [0.2597333788871765, 0.27022168040275574, 0.7447311282157898], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_8_1_0", "boxes": [[75, 1, 91, 13], [34, 10, 44, 19], [0, 0, 108, 33], [60, 89, 81, 110], [98, 92, 119, 114], [133, 99, 149, 108], [13, 107, 29, 120], [40, 115, 57, 136], [0, 73, 71, 214], [82, 94, 229, 235], [152, 127, 168, 144], [0, 132, 14, 151], [21, 133, 35, 151], [103, 134, 121, 152], [211, 141, 221, 159], [172, 144, 184, 155], [121, 154, 139, 174], [182, 153, 202, 176], [0, 158, 11, 175], [40, 159, 58, 173], [24, 164, 41, 186], [120, 183, 143, 204], [104, 198, 119, 213], [161, 199, 184, 224], [240, 202, 262, 223], [39, 227, 58, 245], [126, 226, 275, 372], [33, 247, 53, 264], [10, 260, 24, 280], [72, 260, 93, 281], [0, 222, 125, 357], [55, 273, 71, 292], [231, 286, 250, 307], [123, 296, 133, 316], [177, 307, 192, 325], [84, 311, 105, 330], [151, 313, 173, 334], [176, 335, 188, 347], [231, 349, 243, 362], [207, 359, 226, 374], [36, 361, 58, 382], [43, 378, 189, 450], [95, 406, 114, 423], [163, 416, 175, 436], [69, 425, 89, 447], [131, 425, 153, 446]], "scores": [0.2817738652229309, 0.23719966411590576, 0.22812113165855408, 0.5986768007278442, 0.5747577548027039, 0.22768732905387878, 0.3220905363559723, 0.24645845592021942, 0.25123122334480286, 0.44554609060287476, 0.418779194355011, 0.309284508228302, 0.2661590874195099, 0.4329184889793396, 0.29959046840667725, 0.3524446189403534, 0.42813271284103394, 0.5535919666290283, 0.23699165880680084, 0.2963247001171112, 0.3547030985355377, 0.5493661761283875, 0.2672503888607025, 0.48888519406318665, 0.5905020833015442, 0.25491267442703247, 0.45233476161956787, 0.4292421340942383, 0.27865228056907654, 0.4556725025177002, 0.45649808645248413, 0.2545941174030304, 0.47228312492370605, 0.31894195079803467, 0.28844714164733887, 0.4705638289451599, 0.4701087176799774, 0.22467730939388275, 0.24265596270561218, 0.26487967371940613, 0.575644850730896, 0.38065415620803833, 0.5223137140274048, 0.4089191257953644, 0.44710397720336914, 0.4585355520248413], "labels": ["chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip"]}, {"id": "VD_video_1_11_0_2", "boxes": [[168, 22, 539, 443], [1278, 7, 1482, 426], [1543, 7, 1896, 628], [175, 28, 736, 628], [2279, 224, 2395, 556], [-71, 77, 3559, 629]], "scores": [0.26641976833343506, 0.29470595717430115, 0.2444974035024643, 0.23732727766036987, 0.2677874267101288, 0.20466047525405884], "labels": ["woman", "woman", "woman", "woman", "woman", "image"]}, {"id": "VS_chart_1_5_1_0", "boxes": [[7, 5, 1236, 903], [125, 151, 1099, 656], [277, 183, 1143, 535], [118, 563, 1105, 579], [107, 562, 1113, 579]], "scores": [0.3359640836715698, 0.4943896532058716, 0.2001909762620926, 0.2369922697544098, 0.2515229880809784], "labels": ["graph", "graph", "graph", "line", "line"]}, {"id": "VS_table_1_0_1_1", "boxes": [[232, 251, 242, 265]], "scores": [0.21068082749843597], "labels": ["number"]}, {"id": "VS_table_0_10_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_2_5_2_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_19_1_2", "boxes": [[564, 73, 998, 516], [48, 77, 478, 519], [643, 157, 914, 437], [131, 158, 402, 435]], "scores": [0.3622828722000122, 0.3208995759487152, 0.5669897198677063, 0.5629739165306091], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_1_15_0_0", "boxes": [[501, 25, 532, 63], [0, 0, 650, 489], [89, 55, 550, 419]], "scores": [0.2854059934616089, 0.21910914778709412, 0.6237020492553711], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_8_1_1", "boxes": [[75, 1, 91, 13], [34, 10, 44, 19], [0, 0, 108, 33], [60, 89, 81, 110], [98, 92, 119, 114], [133, 99, 149, 108], [13, 107, 29, 120], [40, 115, 57, 136], [0, 73, 71, 214], [82, 94, 229, 235], [152, 127, 168, 144], [0, 132, 14, 151], [21, 133, 35, 151], [103, 134, 121, 152], [211, 141, 221, 159], [172, 144, 184, 155], [121, 154, 139, 174], [182, 153, 202, 176], [0, 158, 11, 175], [40, 159, 58, 173], [24, 164, 41, 186], [120, 183, 143, 204], [104, 198, 119, 213], [161, 199, 184, 224], [240, 202, 262, 223], [39, 227, 58, 245], [126, 226, 275, 372], [33, 247, 53, 264], [10, 260, 24, 280], [72, 260, 93, 281], [0, 222, 125, 357], [55, 273, 71, 292], [231, 286, 250, 307], [123, 296, 133, 316], [177, 307, 192, 325], [84, 311, 105, 330], [151, 313, 173, 334], [176, 335, 188, 347], [231, 349, 243, 362], [207, 359, 226, 374], [36, 361, 58, 382], [43, 378, 189, 450], [95, 406, 114, 423], [163, 416, 175, 436], [69, 425, 89, 447], [131, 425, 153, 446]], "scores": [0.2817738652229309, 0.23719966411590576, 0.22812113165855408, 0.5986768007278442, 0.5747577548027039, 0.22768732905387878, 0.3220905363559723, 0.24645845592021942, 0.25123122334480286, 0.44554609060287476, 0.418779194355011, 0.309284508228302, 0.2661590874195099, 0.4329184889793396, 0.29959046840667725, 0.3524446189403534, 0.42813271284103394, 0.5535919666290283, 0.23699165880680084, 0.2963247001171112, 0.3547030985355377, 0.5493661761283875, 0.2672503888607025, 0.48888519406318665, 0.5905020833015442, 0.25491267442703247, 0.45233476161956787, 0.4292421340942383, 0.27865228056907654, 0.4556725025177002, 0.45649808645248413, 0.2545941174030304, 0.47228312492370605, 0.31894195079803467, 0.28844714164733887, 0.4705638289451599, 0.4701087176799774, 0.22467730939388275, 0.24265596270561218, 0.26487967371940613, 0.575644850730896, 0.38065415620803833, 0.5223137140274048, 0.4089191257953644, 0.44710397720336914, 0.4585355520248413], "labels": ["chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip", "biscuit", "chocolate chip", "chocolate chip", "chocolate chip", "chocolate chip"]}, {"id": "VD_video_1_11_0_3", "boxes": [[168, 22, 539, 443], [1278, 7, 1482, 426], [1543, 7, 1896, 628], [175, 28, 736, 628], [2279, 224, 2395, 556], [-71, 77, 3559, 629]], "scores": [0.26641976833343506, 0.29470595717430115, 0.2444974035024643, 0.23732727766036987, 0.2677874267101288, 0.20466047525405884], "labels": ["woman", "woman", "woman", "woman", "woman", "image"]}, {"id": "VS_chart_1_5_1_1", "boxes": [[7, 5, 1236, 903], [125, 151, 1099, 656], [277, 183, 1143, 535], [118, 563, 1105, 579], [107, 562, 1113, 579]], "scores": [0.3359640836715698, 0.4943896532058716, 0.2001909762620926, 0.2369922697544098, 0.2515229880809784], "labels": ["graph", "graph", "graph", "line", "line"]}, {"id": "VS_table_1_0_1_2", "boxes": [[232, 251, 242, 265]], "scores": [0.21068082749843597], "labels": ["number"]}, {"id": "VS_table_1_10_1_0", "boxes": [[275, 96, 898, 130], [124, 200, 1138, 590], [267, 221, 459, 565], [856, 412, 867, 436], [856, 477, 867, 499], [902, 476, 919, 500], [856, 541, 868, 564], [869, 540, 887, 564], [902, 540, 919, 564]], "scores": [0.3391054570674896, 0.2491690218448639, 0.2848809063434601, 0.2096831351518631, 0.23121348023414612, 0.21667537093162537, 0.22019384801387787, 0.22322295606136322, 0.21686537563800812], "labels": ["text", "text", "text", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_5_2_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_19_1_3", "boxes": [[564, 73, 998, 516], [48, 77, 478, 519], [643, 157, 914, 437], [131, 158, 402, 435]], "scores": [0.3622828722000122, 0.3208995759487152, 0.5669897198677063, 0.5629739165306091], "labels": ["shape", "shape", "circle", "circle"]}, {"id": "VD_math_1_15_0_1", "boxes": [[501, 25, 532, 63], [0, 0, 650, 489], [89, 55, 550, 419]], "scores": [0.2854059934616089, 0.21910914778709412, 0.6237020492553711], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_9_0_0", "boxes": [[22, 30, 208, 104], [10, 32, 353, 106], [191, 41, 331, 101], [10, 54, 135, 104], [170, 56, 246, 103], [236, 41, 333, 95], [137, 55, 240, 104], [287, 65, 388, 104], [12, 74, 109, 104], [129, 82, 158, 103], [235, 78, 288, 103], [254, 78, 288, 102], [290, 92, 314, 102], [213, 95, 239, 104], [375, 97, 392, 106], [7, 100, 390, 228], [11, 118, 129, 158], [132, 100, 385, 228], [34, 146, 59, 157], [113, 145, 141, 156], [133, 138, 162, 157], [11, 143, 200, 230], [133, 183, 206, 228], [270, 190, 385, 230], [190, 196, 256, 230], [268, 172, 386, 231], [26, 238, 110, 273], [106, 257, 145, 272], [149, 261, 169, 271]], "scores": [0.49325528740882874, 0.3503725826740265, 0.40898042917251587, 0.5088037848472595, 0.33792445063591003, 0.45529794692993164, 0.4056040942668915, 0.5503904223442078, 0.44547009468078613, 0.24607257544994354, 0.3656068444252014, 0.20246583223342896, 0.22483518719673157, 0.2830456495285034, 0.20963826775550842, 0.3677861988544464, 0.5539889335632324, 0.5589544177055359, 0.27972865104675293, 0.3862435221672058, 0.3406428396701813, 0.5753164887428284, 0.5409278273582458, 0.46206820011138916, 0.4833221435546875, 0.5262768864631653, 0.5684675574302673, 0.4488052427768707, 0.3035304844379425], "labels": ["dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur"]}, {"id": "VD_video_2_11_1_0", "boxes": [[3004, 19, 3378, 439], [2233, 1, 2424, 417], [2487, 6, 2833, 623], [1335, 224, 1446, 549], [2827, 3, 3805, 616], [3006, 60, 3589, 624], [15, 69, 3533, 623]], "scores": [0.23825080692768097, 0.24961839616298676, 0.24577069282531738, 0.2610574960708618, 0.21070365607738495, 0.2833718955516815, 0.22053691744804382], "labels": ["woman", "woman", "man", "woman", "image", "man", "image"]}, {"id": "VS_chart_1_5_1_2", "boxes": [[7, 5, 1236, 903], [125, 151, 1099, 656], [277, 183, 1143, 535], [118, 563, 1105, 579], [107, 562, 1113, 579]], "scores": [0.3359640836715698, 0.4943896532058716, 0.2001909762620926, 0.2369922697544098, 0.2515229880809784], "labels": ["graph", "graph", "graph", "line", "line"]}, {"id": "VS_table_2_0_2_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_10_1_1", "boxes": [[275, 96, 898, 130], [124, 200, 1138, 590], [267, 221, 459, 565], [856, 412, 867, 436], [856, 477, 867, 499], [902, 476, 919, 500], [856, 541, 868, 564], [869, 540, 887, 564], [902, 540, 919, 564]], "scores": [0.3391054570674896, 0.2491690218448639, 0.2848809063434601, 0.2096831351518631, 0.23121348023414612, 0.21667537093162537, 0.22019384801387787, 0.22322295606136322, 0.21686537563800812], "labels": ["text", "text", "text", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_6_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_20_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_15_0_2", "boxes": [[501, 25, 532, 63], [0, 0, 650, 489], [89, 55, 550, 419]], "scores": [0.2854059934616089, 0.21910914778709412, 0.6237020492553711], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_9_0_1", "boxes": [[22, 30, 208, 104], [10, 32, 353, 106], [191, 41, 331, 101], [10, 54, 135, 104], [170, 56, 246, 103], [236, 41, 333, 95], [137, 55, 240, 104], [287, 65, 388, 104], [12, 74, 109, 104], [129, 82, 158, 103], [235, 78, 288, 103], [254, 78, 288, 102], [290, 92, 314, 102], [213, 95, 239, 104], [375, 97, 392, 106], [7, 100, 390, 228], [11, 118, 129, 158], [132, 100, 385, 228], [34, 146, 59, 157], [113, 145, 141, 156], [133, 138, 162, 157], [11, 143, 200, 230], [133, 183, 206, 228], [270, 190, 385, 230], [190, 196, 256, 230], [268, 172, 386, 231], [26, 238, 110, 273], [106, 257, 145, 272], [149, 261, 169, 271]], "scores": [0.49325528740882874, 0.3503725826740265, 0.40898042917251587, 0.5088037848472595, 0.33792445063591003, 0.45529794692993164, 0.4056040942668915, 0.5503904223442078, 0.44547009468078613, 0.24607257544994354, 0.3656068444252014, 0.20246583223342896, 0.22483518719673157, 0.2830456495285034, 0.20963826775550842, 0.3677861988544464, 0.5539889335632324, 0.5589544177055359, 0.27972865104675293, 0.3862435221672058, 0.3406428396701813, 0.5753164887428284, 0.5409278273582458, 0.46206820011138916, 0.4833221435546875, 0.5262768864631653, 0.5684675574302673, 0.4488052427768707, 0.3035304844379425], "labels": ["dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur"]}, {"id": "VD_video_2_11_1_1", "boxes": [[3004, 19, 3378, 439], [2233, 1, 2424, 417], [2487, 6, 2833, 623], [1335, 224, 1446, 549], [2827, 3, 3805, 616], [3006, 60, 3589, 624], [15, 69, 3533, 623]], "scores": [0.23825080692768097, 0.24961839616298676, 0.24577069282531738, 0.2610574960708618, 0.21070365607738495, 0.2833718955516815, 0.22053691744804382], "labels": ["woman", "woman", "man", "woman", "image", "man", "image"]}, {"id": "VS_chart_1_5_1_3", "boxes": [[7, 5, 1236, 903], [125, 151, 1099, 656], [277, 183, 1143, 535], [118, 563, 1105, 579], [107, 562, 1113, 579]], "scores": [0.3359640836715698, 0.4943896532058716, 0.2001909762620926, 0.2369922697544098, 0.2515229880809784], "labels": ["graph", "graph", "graph", "line", "line"]}, {"id": "VS_table_2_0_2_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_10_1_2", "boxes": [[275, 96, 898, 130], [124, 200, 1138, 590], [267, 221, 459, 565], [856, 412, 867, 436], [856, 477, 867, 499], [902, 476, 919, 500], [856, 541, 868, 564], [869, 540, 887, 564], [902, 540, 919, 564]], "scores": [0.3391054570674896, 0.2491690218448639, 0.2848809063434601, 0.2096831351518631, 0.23121348023414612, 0.21667537093162537, 0.22019384801387787, 0.22322295606136322, 0.21686537563800812], "labels": ["text", "text", "text", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_6_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_20_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_15_1_0", "boxes": [[465, 37, 494, 72], [2, 2, 625, 449], [90, 57, 514, 397]], "scores": [0.35090410709381104, 0.23383286595344543, 0.6414721012115479], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_9_1_0", "boxes": [[20, 30, 211, 104], [9, 32, 345, 106], [183, 40, 332, 101], [8, 54, 133, 104], [236, 40, 333, 92], [160, 56, 248, 103], [108, 55, 222, 104], [288, 66, 394, 104], [231, 78, 291, 103], [10, 74, 106, 104], [254, 78, 290, 102], [127, 81, 158, 102], [172, 89, 201, 105], [212, 95, 238, 104], [293, 92, 316, 104], [379, 98, 394, 107], [8, 118, 129, 158], [32, 147, 57, 158], [112, 146, 139, 157], [134, 139, 161, 158], [127, 101, 388, 230], [6, 101, 390, 231], [8, 144, 200, 232], [132, 184, 207, 230], [277, 191, 388, 232], [189, 199, 257, 231], [266, 176, 387, 232], [25, 239, 109, 275], [103, 260, 145, 275], [147, 263, 168, 274], [245, 245, 276, 281], [174, 267, 188, 274]], "scores": [0.5466253161430359, 0.36182454228401184, 0.48941245675086975, 0.5128851532936096, 0.3771097958087921, 0.3977442681789398, 0.2837379574775696, 0.5648447871208191, 0.3491117060184479, 0.4552724063396454, 0.23295961320400238, 0.2884026765823364, 0.2491338551044464, 0.29873374104499817, 0.30368494987487793, 0.27847668528556824, 0.5650433897972107, 0.3114350736141205, 0.38768649101257324, 0.31072551012039185, 0.5759497880935669, 0.3872271180152893, 0.5954270362854004, 0.5510482788085938, 0.3504422605037689, 0.5025462508201599, 0.5658548474311829, 0.5733030438423157, 0.44211599230766296, 0.32396990060806274, 0.2882205843925476, 0.20663876831531525], "labels": ["dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "creature", "dinosaur"]}, {"id": "VD_video_2_11_1_2", "boxes": [[3004, 19, 3378, 439], [2233, 1, 2424, 417], [2487, 6, 2833, 623], [1335, 224, 1446, 549], [2827, 3, 3805, 616], [3006, 60, 3589, 624], [15, 69, 3533, 623]], "scores": [0.23825080692768097, 0.24961839616298676, 0.24577069282531738, 0.2610574960708618, 0.21070365607738495, 0.2833718955516815, 0.22053691744804382], "labels": ["woman", "woman", "man", "woman", "image", "man", "image"]}, {"id": "VS_chart_2_5_2_0", "boxes": [[6, 3, 1235, 904], [125, 151, 1099, 656], [117, 563, 1105, 579], [106, 562, 1113, 579]], "scores": [0.34329715371131897, 0.4923528730869293, 0.23788383603096008, 0.25136277079582214], "labels": ["graph", "graph", "line", "line"]}, {"id": "VS_table_2_0_2_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_10_1_3", "boxes": [[275, 96, 898, 130], [124, 200, 1138, 590], [267, 221, 459, 565], [856, 412, 867, 436], [856, 477, 867, 499], [902, 476, 919, 500], [856, 541, 868, 564], [869, 540, 887, 564], [902, 540, 919, 564]], "scores": [0.3391054570674896, 0.2491690218448639, 0.2848809063434601, 0.2096831351518631, 0.23121348023414612, 0.21667537093162537, 0.22019384801387787, 0.22322295606136322, 0.21686537563800812], "labels": ["text", "text", "text", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_1_6_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_21_0_0", "boxes": [[104, 103, 414, 403], [512, 104, 824, 403], [166, 160, 349, 348], [578, 161, 758, 348]], "scores": [0.39715853333473206, 0.3883022367954254, 0.6213358640670776, 0.5878685712814331], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_2_15_1_1", "boxes": [[465, 37, 494, 72], [2, 2, 625, 449], [90, 57, 514, 397]], "scores": [0.35090410709381104, 0.23383286595344543, 0.6414721012115479], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_9_1_1", "boxes": [[20, 30, 211, 104], [9, 32, 345, 106], [183, 40, 332, 101], [8, 54, 133, 104], [236, 40, 333, 92], [160, 56, 248, 103], [108, 55, 222, 104], [288, 66, 394, 104], [231, 78, 291, 103], [10, 74, 106, 104], [254, 78, 290, 102], [127, 81, 158, 102], [172, 89, 201, 105], [212, 95, 238, 104], [293, 92, 316, 104], [379, 98, 394, 107], [8, 118, 129, 158], [32, 147, 57, 158], [112, 146, 139, 157], [134, 139, 161, 158], [127, 101, 388, 230], [6, 101, 390, 231], [8, 144, 200, 232], [132, 184, 207, 230], [277, 191, 388, 232], [189, 199, 257, 231], [266, 176, 387, 232], [25, 239, 109, 275], [103, 260, 145, 275], [147, 263, 168, 274], [245, 245, 276, 281], [174, 267, 188, 274]], "scores": [0.5466253161430359, 0.36182454228401184, 0.48941245675086975, 0.5128851532936096, 0.3771097958087921, 0.3977442681789398, 0.2837379574775696, 0.5648447871208191, 0.3491117060184479, 0.4552724063396454, 0.23295961320400238, 0.2884026765823364, 0.2491338551044464, 0.29873374104499817, 0.30368494987487793, 0.27847668528556824, 0.5650433897972107, 0.3114350736141205, 0.38768649101257324, 0.31072551012039185, 0.5759497880935669, 0.3872271180152893, 0.5954270362854004, 0.5510482788085938, 0.3504422605037689, 0.5025462508201599, 0.5658548474311829, 0.5733030438423157, 0.44211599230766296, 0.32396990060806274, 0.2882205843925476, 0.20663876831531525], "labels": ["dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "dinosaur", "creature", "dinosaur"]}, {"id": "VD_video_2_11_1_3", "boxes": [[3004, 19, 3378, 439], [2233, 1, 2424, 417], [2487, 6, 2833, 623], [1335, 224, 1446, 549], [2827, 3, 3805, 616], [3006, 60, 3589, 624], [15, 69, 3533, 623]], "scores": [0.23825080692768097, 0.24961839616298676, 0.24577069282531738, 0.2610574960708618, 0.21070365607738495, 0.2833718955516815, 0.22053691744804382], "labels": ["woman", "woman", "man", "woman", "image", "man", "image"]}, {"id": "VS_chart_2_5_2_1", "boxes": [[6, 3, 1235, 904], [125, 151, 1099, 656], [117, 563, 1105, 579], [106, 562, 1113, 579]], "scores": [0.34329715371131897, 0.4923528730869293, 0.23788383603096008, 0.25136277079582214], "labels": ["graph", "graph", "line", "line"]}, {"id": "VS_table_0_1_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_10_2_0", "boxes": [[850, 255, 867, 280], [892, 385, 903, 407], [844, 449, 855, 471], [890, 448, 907, 472], [844, 512, 855, 536], [857, 512, 875, 536], [890, 512, 907, 536]], "scores": [0.21866220235824585, 0.20290370285511017, 0.21863161027431488, 0.21919164061546326, 0.23241019248962402, 0.220402330160141, 0.21939882636070251], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_1_6_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_21_0_1", "boxes": [[104, 103, 414, 403], [512, 104, 824, 403], [166, 160, 349, 348], [578, 161, 758, 348]], "scores": [0.39715853333473206, 0.3883022367954254, 0.6213358640670776, 0.5878685712814331], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_2_15_1_2", "boxes": [[465, 37, 494, 72], [2, 2, 625, 449], [90, 57, 514, 397]], "scores": [0.35090410709381104, 0.23383286595344543, 0.6414721012115479], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_10_0_0", "boxes": [[746, 308, 792, 364], [72, 209, 553, 591], [445, 214, 774, 520], [743, 305, 903, 523], [183, 336, 543, 606], [872, 445, 1018, 587], [987, 473, 1084, 562], [1063, 461, 1145, 526], [839, 444, 1019, 646], [16, 220, 1438, 957], [655, 495, 785, 575], [510, 506, 672, 657], [798, 465, 924, 633], [946, 517, 1005, 579], [939, 544, 1036, 665], [12, 488, 245, 769], [231, 546, 519, 746], [917, 498, 1362, 870], [653, 540, 872, 760], [474, 625, 544, 680], [559, 614, 651, 696], [869, 652, 942, 717], [489, 671, 564, 749], [1397, 676, 1523, 797], [515, 698, 596, 779], [587, 694, 770, 803], [1234, 692, 1425, 863], [187, 736, 378, 837], [198, 632, 629, 921], [585, 724, 666, 788], [34, 737, 190, 889], [185, 734, 428, 942], [591, 782, 682, 861], [846, 791, 946, 905], [333, 817, 414, 881], [200, 814, 417, 942], [1276, 830, 1388, 930], [629, 837, 696, 909], [890, 854, 971, 937], [1072, 866, 1160, 953], [6, 520, 1525, 1019]], "scores": [0.255906879901886, 0.6033681631088257, 0.4980804920196533, 0.3377852737903595, 0.37864503264427185, 0.2547018229961395, 0.39745572209358215, 0.2907165288925171, 0.48974287509918213, 0.24498139321804047, 0.4825688302516937, 0.48851245641708374, 0.20432522892951965, 0.341126412153244, 0.42342594265937805, 0.4512639045715332, 0.2366195023059845, 0.25598493218421936, 0.3829788267612457, 0.41201624274253845, 0.4214572608470917, 0.4140571653842926, 0.45361843705177307, 0.28553617000579834, 0.4643978178501129, 0.2749347984790802, 0.4950968325138092, 0.23068436980247498, 0.3615223169326782, 0.33118051290512085, 0.2710534334182739, 0.32606035470962524, 0.4603195786476135, 0.37628835439682007, 0.2693576514720917, 0.27918606996536255, 0.3787443935871124, 0.46642976999282837, 0.516279935836792, 0.5063506364822388, 0.3041321337223053], "labels": ["tomato", "basket", "broccoli", "pepper", "vegetable", "broccoli", "tomato", "tomato", "broccoli", "variety", "tomato", "tomato", "pepper", "tomato", "tomato", "lettuce", "vegetable", "vegetable", "pepper", "tomato", "tomato", "tomato", "tomato", "fruit", "tomato", "vegetable", "tomato", "vegetable", "vegetable", "tomato", "fruit", "variety", "tomato", "vegetable", "vegetable", "vegetable", "tomato", "tomato", "tomato", "tomato", "table"]}, {"id": "VD_video_2_11_2_0", "boxes": [[337, 1, 536, 410], [988, 10, 1883, 620], [1116, 17, 1468, 437], [594, 2, 938, 618], [2273, 218, 2395, 548], [1114, 27, 1677, 620], [8, 7, 1007, 622], [12, 15, 1935, 626]], "scores": [0.30682092905044556, 0.2123109996318817, 0.28435784578323364, 0.254456490278244, 0.2793765068054199, 0.21176329255104065, 0.20613281428813934, 0.25858768820762634], "labels": ["woman", "reflection", "woman", "woman", "woman", "person", "reflection", "image"]}, {"id": "VS_chart_2_5_2_2", "boxes": [[6, 3, 1235, 904], [125, 151, 1099, 656], [117, 563, 1105, 579], [106, 562, 1113, 579]], "scores": [0.34329715371131897, 0.4923528730869293, 0.23788383603096008, 0.25136277079582214], "labels": ["graph", "graph", "line", "line"]}, {"id": "VS_table_0_1_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_10_2_1", "boxes": [[850, 255, 867, 280], [892, 385, 903, 407], [844, 449, 855, 471], [890, 448, 907, 472], [844, 512, 855, 536], [857, 512, 875, 536], [890, 512, 907, 536]], "scores": [0.21866220235824585, 0.20290370285511017, 0.21863161027431488, 0.21919164061546326, 0.23241019248962402, 0.220402330160141, 0.21939882636070251], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_6_2_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_21_0_2", "boxes": [[104, 103, 414, 403], [512, 104, 824, 403], [166, 160, 349, 348], [578, 161, 758, 348]], "scores": [0.39715853333473206, 0.3883022367954254, 0.6213358640670776, 0.5878685712814331], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_1_16_0_0", "boxes": [[487, 11, 518, 49], [-1, 0, 622, 442], [78, 44, 535, 405]], "scores": [0.2911333739757538, 0.22314605116844177, 0.6099278926849365], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_10_0_1", "boxes": [[746, 308, 792, 364], [72, 209, 553, 591], [445, 214, 774, 520], [743, 305, 903, 523], [183, 336, 543, 606], [872, 445, 1018, 587], [987, 473, 1084, 562], [1063, 461, 1145, 526], [839, 444, 1019, 646], [16, 220, 1438, 957], [655, 495, 785, 575], [510, 506, 672, 657], [798, 465, 924, 633], [946, 517, 1005, 579], [939, 544, 1036, 665], [12, 488, 245, 769], [231, 546, 519, 746], [917, 498, 1362, 870], [653, 540, 872, 760], [474, 625, 544, 680], [559, 614, 651, 696], [869, 652, 942, 717], [489, 671, 564, 749], [1397, 676, 1523, 797], [515, 698, 596, 779], [587, 694, 770, 803], [1234, 692, 1425, 863], [187, 736, 378, 837], [198, 632, 629, 921], [585, 724, 666, 788], [34, 737, 190, 889], [185, 734, 428, 942], [591, 782, 682, 861], [846, 791, 946, 905], [333, 817, 414, 881], [200, 814, 417, 942], [1276, 830, 1388, 930], [629, 837, 696, 909], [890, 854, 971, 937], [1072, 866, 1160, 953], [6, 520, 1525, 1019]], "scores": [0.255906879901886, 0.6033681631088257, 0.4980804920196533, 0.3377852737903595, 0.37864503264427185, 0.2547018229961395, 0.39745572209358215, 0.2907165288925171, 0.48974287509918213, 0.24498139321804047, 0.4825688302516937, 0.48851245641708374, 0.20432522892951965, 0.341126412153244, 0.42342594265937805, 0.4512639045715332, 0.2366195023059845, 0.25598493218421936, 0.3829788267612457, 0.41201624274253845, 0.4214572608470917, 0.4140571653842926, 0.45361843705177307, 0.28553617000579834, 0.4643978178501129, 0.2749347984790802, 0.4950968325138092, 0.23068436980247498, 0.3615223169326782, 0.33118051290512085, 0.2710534334182739, 0.32606035470962524, 0.4603195786476135, 0.37628835439682007, 0.2693576514720917, 0.27918606996536255, 0.3787443935871124, 0.46642976999282837, 0.516279935836792, 0.5063506364822388, 0.3041321337223053], "labels": ["tomato", "basket", "broccoli", "pepper", "vegetable", "broccoli", "tomato", "tomato", "broccoli", "variety", "tomato", "tomato", "pepper", "tomato", "tomato", "lettuce", "vegetable", "vegetable", "pepper", "tomato", "tomato", "tomato", "tomato", "fruit", "tomato", "vegetable", "tomato", "vegetable", "vegetable", "tomato", "fruit", "variety", "tomato", "vegetable", "vegetable", "vegetable", "tomato", "tomato", "tomato", "tomato", "table"]}, {"id": "VD_video_2_11_2_1", "boxes": [[337, 1, 536, 410], [988, 10, 1883, 620], [1116, 17, 1468, 437], [594, 2, 938, 618], [2273, 218, 2395, 548], [1114, 27, 1677, 620], [8, 7, 1007, 622], [12, 15, 1935, 626]], "scores": [0.30682092905044556, 0.2123109996318817, 0.28435784578323364, 0.254456490278244, 0.2793765068054199, 0.21176329255104065, 0.20613281428813934, 0.25858768820762634], "labels": ["woman", "reflection", "woman", "woman", "woman", "person", "reflection", "image"]}, {"id": "VS_chart_2_5_2_3", "boxes": [[6, 3, 1235, 904], [125, 151, 1099, 656], [117, 563, 1105, 579], [106, 562, 1113, 579]], "scores": [0.34329715371131897, 0.4923528730869293, 0.23788383603096008, 0.25136277079582214], "labels": ["graph", "graph", "line", "line"]}, {"id": "VS_table_0_1_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_10_2_2", "boxes": [[850, 255, 867, 280], [892, 385, 903, 407], [844, 449, 855, 471], [890, 448, 907, 472], [844, 512, 855, 536], [857, 512, 875, 536], [890, 512, 907, 536]], "scores": [0.21866220235824585, 0.20290370285511017, 0.21863161027431488, 0.21919164061546326, 0.23241019248962402, 0.220402330160141, 0.21939882636070251], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_2_6_2_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_21_0_3", "boxes": [[104, 103, 414, 403], [512, 104, 824, 403], [166, 160, 349, 348], [578, 161, 758, 348]], "scores": [0.39715853333473206, 0.3883022367954254, 0.6213358640670776, 0.5878685712814331], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_1_16_0_1", "boxes": [[487, 11, 518, 49], [-1, 0, 622, 442], [78, 44, 535, 405]], "scores": [0.2911333739757538, 0.22314605116844177, 0.6099278926849365], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_10_1_0", "boxes": [[242, 99, 261, 117], [23, 67, 180, 188], [145, 69, 249, 169], [242, 98, 292, 166], [61, 108, 175, 194], [273, 144, 328, 207], [345, 149, 369, 170], [319, 152, 350, 181], [213, 159, 253, 185], [6, 71, 444, 310], [166, 162, 218, 214], [307, 167, 328, 188], [304, 148, 369, 215], [212, 174, 283, 243], [304, 176, 336, 214], [385, 170, 501, 234], [4, 157, 80, 251], [76, 178, 168, 241], [155, 201, 177, 220], [182, 197, 211, 225], [158, 154, 318, 304], [158, 98, 374, 306], [280, 210, 307, 232], [300, 164, 440, 280], [159, 216, 182, 241], [61, 207, 107, 243], [65, 207, 201, 298], [192, 224, 249, 259], [400, 225, 460, 257], [453, 219, 494, 256], [167, 225, 194, 252], [61, 238, 122, 270], [190, 231, 216, 254], [12, 237, 62, 286], [113, 230, 201, 297], [192, 253, 219, 279], [61, 237, 137, 303], [274, 256, 307, 291], [110, 263, 135, 283], [65, 264, 134, 304], [203, 269, 225, 293], [289, 276, 315, 303], [348, 281, 375, 308], [397, 267, 500, 312], [0, 176, 404, 330], [430, 288, 500, 311], [1, 169, 496, 330], [107, 280, 386, 332], [396, 254, 501, 330], [399, 295, 498, 331]], "scores": [0.2200026959180832, 0.6349605321884155, 0.4713625907897949, 0.31894364953041077, 0.39415794610977173, 0.5055505633354187, 0.3439199924468994, 0.3576376140117645, 0.43389129638671875, 0.25590747594833374, 0.3912879526615143, 0.28627100586891174, 0.21190978586673737, 0.3704305589199066, 0.3449534475803375, 0.20451834797859192, 0.33034050464630127, 0.2659214735031128, 0.3803904950618744, 0.3226436674594879, 0.2068255990743637, 0.24103319644927979, 0.36087310314178467, 0.3024158477783203, 0.37819987535476685, 0.2449045032262802, 0.37656170129776, 0.30838215351104736, 0.45849180221557617, 0.3064013719558716, 0.39524951577186584, 0.2331569343805313, 0.3624066412448883, 0.3112429976463318, 0.2896551489830017, 0.42938703298568726, 0.37192413210868835, 0.39469560980796814, 0.26920294761657715, 0.2902892827987671, 0.4145178198814392, 0.4992150366306305, 0.4741063416004181, 0.5297697186470032, 0.3209788203239441, 0.21440166234970093, 0.52602618932724, 0.2673895061016083, 0.5392680764198303, 0.2123563438653946], "labels": ["tomato", "basket", "vegetable", "pepper", "vegetable", "vegetable", "tomato", "tomato", "tomato", "variety", "tomato", "tomato", "variety", "pepper", "tomato", "cutting board", "vegetable", "vegetable", "tomato", "tomato", "variety", "variety", "tomato", "vegetable", "tomato", "vegetable", "vegetable", "tomato", "tomato", "fruit", "tomato", "vegetable", "tomato", "fruit", "vegetable", "tomato", "variety", "vegetable", "vegetable", "vegetable", "tomato", "tomato", "tomato", "meat", "cutting board", "meat", "cutting board", "cutting board", "cutting board", "cutting board"]}, {"id": "VD_video_2_11_2_2", "boxes": [[337, 1, 536, 410], [988, 10, 1883, 620], [1116, 17, 1468, 437], [594, 2, 938, 618], [2273, 218, 2395, 548], [1114, 27, 1677, 620], [8, 7, 1007, 622], [12, 15, 1935, 626]], "scores": [0.30682092905044556, 0.2123109996318817, 0.28435784578323364, 0.254456490278244, 0.2793765068054199, 0.21176329255104065, 0.20613281428813934, 0.25858768820762634], "labels": ["woman", "reflection", "woman", "woman", "woman", "person", "reflection", "image"]}, {"id": "VS_chart_0_6_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_1_1_0", "boxes": [[500, 220, 517, 246], [499, 285, 517, 310], [500, 349, 517, 374], [500, 413, 517, 438], [499, 477, 517, 502]], "scores": [0.20397724211215973, 0.25723737478256226, 0.2600494623184204, 0.23917217552661896, 0.2285660207271576], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_table_2_10_2_3", "boxes": [[850, 255, 867, 280], [892, 385, 903, 407], [844, 449, 855, 471], [890, 448, 907, 472], [844, 512, 855, 536], [857, 512, 875, 536], [890, 512, 907, 536]], "scores": [0.21866220235824585, 0.20290370285511017, 0.21863161027431488, 0.21919164061546326, 0.23241019248962402, 0.220402330160141, 0.21939882636070251], "labels": ["number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_ocr_0_7_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_21_1_0", "boxes": [[88, 106, 400, 406], [497, 105, 808, 405], [150, 162, 333, 350], [562, 162, 743, 351]], "scores": [0.3929329514503479, 0.3737882971763611, 0.627541720867157, 0.6051507592201233], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_1_16_0_2", "boxes": [[487, 11, 518, 49], [-1, 0, 622, 442], [78, 44, 535, 405]], "scores": [0.2911333739757538, 0.22314605116844177, 0.6099278926849365], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_10_1_1", "boxes": [[242, 99, 261, 117], [23, 67, 180, 188], [145, 69, 249, 169], [242, 98, 292, 166], [61, 108, 175, 194], [273, 144, 328, 207], [345, 149, 369, 170], [319, 152, 350, 181], [213, 159, 253, 185], [6, 71, 444, 310], [166, 162, 218, 214], [307, 167, 328, 188], [304, 148, 369, 215], [212, 174, 283, 243], [304, 176, 336, 214], [385, 170, 501, 234], [4, 157, 80, 251], [76, 178, 168, 241], [155, 201, 177, 220], [182, 197, 211, 225], [158, 154, 318, 304], [158, 98, 374, 306], [280, 210, 307, 232], [300, 164, 440, 280], [159, 216, 182, 241], [61, 207, 107, 243], [65, 207, 201, 298], [192, 224, 249, 259], [400, 225, 460, 257], [453, 219, 494, 256], [167, 225, 194, 252], [61, 238, 122, 270], [190, 231, 216, 254], [12, 237, 62, 286], [113, 230, 201, 297], [192, 253, 219, 279], [61, 237, 137, 303], [274, 256, 307, 291], [110, 263, 135, 283], [65, 264, 134, 304], [203, 269, 225, 293], [289, 276, 315, 303], [348, 281, 375, 308], [397, 267, 500, 312], [0, 176, 404, 330], [430, 288, 500, 311], [1, 169, 496, 330], [107, 280, 386, 332], [396, 254, 501, 330], [399, 295, 498, 331]], "scores": [0.2200026959180832, 0.6349605321884155, 0.4713625907897949, 0.31894364953041077, 0.39415794610977173, 0.5055505633354187, 0.3439199924468994, 0.3576376140117645, 0.43389129638671875, 0.25590747594833374, 0.3912879526615143, 0.28627100586891174, 0.21190978586673737, 0.3704305589199066, 0.3449534475803375, 0.20451834797859192, 0.33034050464630127, 0.2659214735031128, 0.3803904950618744, 0.3226436674594879, 0.2068255990743637, 0.24103319644927979, 0.36087310314178467, 0.3024158477783203, 0.37819987535476685, 0.2449045032262802, 0.37656170129776, 0.30838215351104736, 0.45849180221557617, 0.3064013719558716, 0.39524951577186584, 0.2331569343805313, 0.3624066412448883, 0.3112429976463318, 0.2896551489830017, 0.42938703298568726, 0.37192413210868835, 0.39469560980796814, 0.26920294761657715, 0.2902892827987671, 0.4145178198814392, 0.4992150366306305, 0.4741063416004181, 0.5297697186470032, 0.3209788203239441, 0.21440166234970093, 0.52602618932724, 0.2673895061016083, 0.5392680764198303, 0.2123563438653946], "labels": ["tomato", "basket", "vegetable", "pepper", "vegetable", "vegetable", "tomato", "tomato", "tomato", "variety", "tomato", "tomato", "variety", "pepper", "tomato", "cutting board", "vegetable", "vegetable", "tomato", "tomato", "variety", "variety", "tomato", "vegetable", "tomato", "vegetable", "vegetable", "tomato", "tomato", "fruit", "tomato", "vegetable", "tomato", "fruit", "vegetable", "tomato", "variety", "vegetable", "vegetable", "vegetable", "tomato", "tomato", "tomato", "meat", "cutting board", "meat", "cutting board", "cutting board", "cutting board", "cutting board"]}, {"id": "VD_video_2_11_2_3", "boxes": [[337, 1, 536, 410], [988, 10, 1883, 620], [1116, 17, 1468, 437], [594, 2, 938, 618], [2273, 218, 2395, 548], [1114, 27, 1677, 620], [8, 7, 1007, 622], [12, 15, 1935, 626]], "scores": [0.30682092905044556, 0.2123109996318817, 0.28435784578323364, 0.254456490278244, 0.2793765068054199, 0.21176329255104065, 0.20613281428813934, 0.25858768820762634], "labels": ["woman", "reflection", "woman", "woman", "woman", "person", "reflection", "image"]}, {"id": "VS_chart_0_6_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_1_1_1", "boxes": [[500, 220, 517, 246], [499, 285, 517, 310], [500, 349, 517, 374], [500, 413, 517, 438], [499, 477, 517, 502]], "scores": [0.20397724211215973, 0.25723737478256226, 0.2600494623184204, 0.23917217552661896, 0.2285660207271576], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_map_0_0_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_7_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_21_1_1", "boxes": [[88, 106, 400, 406], [497, 105, 808, 405], [150, 162, 333, 350], [562, 162, 743, 351]], "scores": [0.3929329514503479, 0.3737882971763611, 0.627541720867157, 0.6051507592201233], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_2_16_1_0", "boxes": [[458, 6, 486, 41], [85, 24, 503, 364]], "scores": [0.3487739562988281, 0.7058389782905579], "labels": ["triangle", "triangle"]}, {"id": "VD_figure_1_11_0_0", "boxes": [[325, 0, 354, 18], [3, 4, 508, 284], [94, 45, 133, 71], [410, 40, 450, 75], [6, 11, 446, 284], [233, 12, 354, 135], [312, 63, 439, 172], [31, 50, 231, 187], [111, 47, 229, 117], [32, 92, 188, 188], [0, 109, 39, 135], [183, 97, 299, 175], [227, 11, 425, 281], [255, 152, 398, 271], [81, 162, 252, 282], [0, 159, 254, 287], [79, 217, 129, 272], [0, 239, 90, 288], [257, 153, 438, 287], [363, 246, 440, 287]], "scores": [0.42336761951446533, 0.4620301425457001, 0.252280592918396, 0.38536062836647034, 0.4125494062900543, 0.6033455729484558, 0.5363496541976929, 0.236735537648201, 0.5692335367202759, 0.5396010875701904, 0.4045335054397583, 0.5387709140777588, 0.20480112731456757, 0.5544689893722534, 0.5564536452293396, 0.26886922121047974, 0.22371967136859894, 0.42486122250556946, 0.27876344323158264, 0.4114750325679779], "labels": ["stick", "platter", "stick", "stick", "chocolate", "chocolate", "treat", "treat", "treat", "treat", "stick", "treat", "chocolate", "chocolate", "treat", "treat", "chocolate", "stick", "lollipop", "stick"]}, {"id": "VD_video_1_12_0_0", "boxes": [[277, 143, 618, 490], [2050, 142, 2392, 489], [1163, 141, 1504, 485], [2934, 140, 3278, 486], [11, 35, 3486, 626]], "scores": [0.419859915971756, 0.39436402916908264, 0.4294792115688324, 0.4149872958660126, 0.29560068249702454], "labels": ["circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_0_6_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_1_1_1_2", "boxes": [[500, 220, 517, 246], [499, 285, 517, 310], [500, 349, 517, 374], [500, 413, 517, 438], [499, 477, 517, 502]], "scores": [0.20397724211215973, 0.25723737478256226, 0.2600494623184204, 0.23917217552661896, 0.2285660207271576], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_map_0_0_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_7_1_0", "boxes": [[150, 32, 504, 185], [1, 33, 149, 180], [2, 0, 870, 531], [517, 31, 858, 180], [10, 111, 858, 398], [5, 190, 871, 333], [413, 183, 774, 334], [2, 187, 404, 333], [282, 336, 543, 482], [546, 337, 877, 484], [3, 336, 280, 482], [98, 483, 385, 538], [385, 484, 670, 538], [667, 483, 878, 537]], "scores": [0.3147154748439789, 0.22697299718856812, 0.6487982869148254, 0.28165963292121887, 0.29441604018211365, 0.23844864964485168, 0.31082040071487427, 0.4043145477771759, 0.3776925802230835, 0.405740350484848, 0.3590713441371918, 0.35852810740470886, 0.357906311750412, 0.2950196862220764], "labels": ["stone", "stone", "stone building", "stone", "writing", "plaque", "plaque", "plaque", "stone", "stone", "stone", "stone", "stone", "stone"]}, {"id": "VD_illusion_2_21_1_2", "boxes": [[88, 106, 400, 406], [497, 105, 808, 405], [150, 162, 333, 350], [562, 162, 743, 351]], "scores": [0.3929329514503479, 0.3737882971763611, 0.627541720867157, 0.6051507592201233], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_2_16_1_1", "boxes": [[458, 6, 486, 41], [85, 24, 503, 364]], "scores": [0.3487739562988281, 0.7058389782905579], "labels": ["triangle", "triangle"]}, {"id": "VD_figure_1_11_0_1", "boxes": [[325, 0, 354, 18], [3, 4, 508, 284], [94, 45, 133, 71], [410, 40, 450, 75], [6, 11, 446, 284], [233, 12, 354, 135], [312, 63, 439, 172], [31, 50, 231, 187], [111, 47, 229, 117], [32, 92, 188, 188], [0, 109, 39, 135], [183, 97, 299, 175], [227, 11, 425, 281], [255, 152, 398, 271], [81, 162, 252, 282], [0, 159, 254, 287], [79, 217, 129, 272], [0, 239, 90, 288], [257, 153, 438, 287], [363, 246, 440, 287]], "scores": [0.42336761951446533, 0.4620301425457001, 0.252280592918396, 0.38536062836647034, 0.4125494062900543, 0.6033455729484558, 0.5363496541976929, 0.236735537648201, 0.5692335367202759, 0.5396010875701904, 0.4045335054397583, 0.5387709140777588, 0.20480112731456757, 0.5544689893722534, 0.5564536452293396, 0.26886922121047974, 0.22371967136859894, 0.42486122250556946, 0.27876344323158264, 0.4114750325679779], "labels": ["stick", "platter", "stick", "stick", "chocolate", "chocolate", "treat", "treat", "treat", "treat", "stick", "treat", "chocolate", "chocolate", "treat", "treat", "chocolate", "stick", "lollipop", "stick"]}, {"id": "VD_video_1_12_0_1", "boxes": [[277, 143, 618, 490], [2050, 142, 2392, 489], [1163, 141, 1504, 485], [2934, 140, 3278, 486], [11, 35, 3486, 626]], "scores": [0.419859915971756, 0.39436402916908264, 0.4294792115688324, 0.4149872958660126, 0.29560068249702454], "labels": ["circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_0_6_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_1_2_0", "boxes": [[230, 53, 687, 87], [558, 231, 576, 257], [79, 138, 780, 544], [557, 296, 575, 321], [113, 160, 385, 521], [557, 363, 575, 389], [558, 429, 575, 454], [557, 496, 575, 522]], "scores": [0.27645859122276306, 0.2287260890007019, 0.24852722883224487, 0.2534462511539459, 0.23788847029209137, 0.2870651185512543, 0.24783815443515778, 0.2619992196559906], "labels": ["text", "number", "text", "number", "text", "number", "number", "number"]}, {"id": "VS_map_0_0_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_7_1_1", "boxes": [[150, 32, 504, 185], [1, 33, 149, 180], [2, 0, 870, 531], [517, 31, 858, 180], [10, 111, 858, 398], [5, 190, 871, 333], [413, 183, 774, 334], [2, 187, 404, 333], [282, 336, 543, 482], [546, 337, 877, 484], [3, 336, 280, 482], [98, 483, 385, 538], [385, 484, 670, 538], [667, 483, 878, 537]], "scores": [0.3147154748439789, 0.22697299718856812, 0.6487982869148254, 0.28165963292121887, 0.29441604018211365, 0.23844864964485168, 0.31082040071487427, 0.4043145477771759, 0.3776925802230835, 0.405740350484848, 0.3590713441371918, 0.35852810740470886, 0.357906311750412, 0.2950196862220764], "labels": ["stone", "stone", "stone building", "stone", "writing", "plaque", "plaque", "plaque", "stone", "stone", "stone", "stone", "stone", "stone"]}, {"id": "VD_illusion_2_21_1_3", "boxes": [[88, 106, 400, 406], [497, 105, 808, 405], [150, 162, 333, 350], [562, 162, 743, 351]], "scores": [0.3929329514503479, 0.3737882971763611, 0.627541720867157, 0.6051507592201233], "labels": ["square", "square", "circle", "circle"]}, {"id": "VD_math_2_16_1_2", "boxes": [[458, 6, 486, 41], [85, 24, 503, 364]], "scores": [0.3487739562988281, 0.7058389782905579], "labels": ["triangle", "triangle"]}, {"id": "VD_figure_2_11_1_0", "boxes": [[331, 0, 359, 18], [15, 19, 33, 37], [25, 15, 57, 38], [15, 20, 41, 47], [20, 33, 40, 47], [5, 4, 514, 289], [317, 42, 469, 278], [102, 46, 133, 70], [415, 41, 455, 75], [10, 10, 72, 74], [5, 10, 450, 288], [113, 48, 233, 120], [237, 11, 359, 137], [316, 63, 444, 173], [35, 51, 234, 190], [186, 98, 303, 176], [0, 112, 38, 137], [36, 93, 189, 191], [59, 98, 295, 286], [236, 10, 438, 285], [258, 153, 400, 273], [84, 163, 256, 285], [2, 100, 284, 290], [0, 162, 255, 290], [81, 218, 128, 273], [1, 241, 91, 291], [261, 156, 448, 291], [256, 49, 458, 293], [368, 249, 447, 290]], "scores": [0.4218183159828186, 0.2047184705734253, 0.3183385133743286, 0.2776392102241516, 0.2521999180316925, 0.41799724102020264, 0.258478045463562, 0.24926616251468658, 0.41959989070892334, 0.2838291823863983, 0.35811159014701843, 0.5840131044387817, 0.5782139897346497, 0.5363598465919495, 0.2894122898578644, 0.5529283285140991, 0.42832809686660767, 0.5663236379623413, 0.2738245725631714, 0.2001977562904358, 0.5374676585197449, 0.5316109657287598, 0.20928449928760529, 0.32419446110725403, 0.2196035534143448, 0.4458579123020172, 0.22883892059326172, 0.2046220749616623, 0.4521254301071167], "labels": ["stick", "cherry", "topping", "cherry", "cherry", "platter", "stick", "stick", "stick", "dessert", "chocolate", "treat", "chocolate", "treat", "treat", "treat", "stick", "treat", "treat", "chocolate", "chocolate", "treat", "treat", "treat", "chocolate", "stick", "lollipop", "stick", "stick"]}, {"id": "VD_video_1_12_0_2", "boxes": [[277, 143, 618, 490], [2050, 142, 2392, 489], [1163, 141, 1504, 485], [2934, 140, 3278, 486], [11, 35, 3486, 626]], "scores": [0.419859915971756, 0.39436402916908264, 0.4294792115688324, 0.4149872958660126, 0.29560068249702454], "labels": ["circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_1_6_1_0", "boxes": [[-1, 4, 732, 551], [106, 116, 727, 491], [8, 113, 730, 542]], "scores": [0.38329699635505676, 0.44365230202674866, 0.23826031386852264], "labels": ["screenshot", "graph", "graph"]}, {"id": "VS_table_2_1_2_1", "boxes": [[230, 53, 687, 87], [558, 231, 576, 257], [79, 138, 780, 544], [557, 296, 575, 321], [113, 160, 385, 521], [557, 363, 575, 389], [558, 429, 575, 454], [557, 496, 575, 522]], "scores": [0.27645859122276306, 0.2287260890007019, 0.24852722883224487, 0.2534462511539459, 0.23788847029209137, 0.2870651185512543, 0.24783815443515778, 0.2619992196559906], "labels": ["text", "number", "text", "number", "text", "number", "number", "number"]}, {"id": "VS_map_0_0_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_2_7_2_0", "boxes": [[170, 0, 468, 40], [210, 37, 729, 258], [0, 36, 209, 252], [753, 36, 1248, 257], [13, 269, 1262, 479], [2, 5, 1262, 757], [4, 263, 588, 476], [9, 150, 1251, 567], [598, 258, 1131, 478], [910, 493, 1210, 596], [409, 479, 783, 692], [1, 479, 400, 693], [790, 478, 1273, 692], [974, 695, 1276, 771], [144, 699, 552, 775], [562, 698, 973, 774]], "scores": [0.20903745293617249, 0.32492244243621826, 0.26523834466934204, 0.28249362111091614, 0.2562359571456909, 0.4983612596988678, 0.33915871381759644, 0.2702161967754364, 0.25471174716949463, 0.31106242537498474, 0.318705290555954, 0.36895492672920227, 0.4290233552455902, 0.28053298592567444, 0.3337510824203491, 0.3619234561920166], "labels": ["stone", "stone", "stone", "stone", "plaque", "wall", "plaque", "writing", "plaque", "plaque", "stone", "stone", "stone", "stone", "stone", "stone"]}, {"id": "VD_illusion_1_22_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_17_0_0", "boxes": [[273, 23, 305, 62], [0, 3, 635, 561], [83, 88, 560, 453]], "scores": [0.2724394202232361, 0.31459176540374756, 0.693804144859314], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_11_1_1", "boxes": [[331, 0, 359, 18], [15, 19, 33, 37], [25, 15, 57, 38], [15, 20, 41, 47], [20, 33, 40, 47], [5, 4, 514, 289], [317, 42, 469, 278], [102, 46, 133, 70], [415, 41, 455, 75], [10, 10, 72, 74], [5, 10, 450, 288], [113, 48, 233, 120], [237, 11, 359, 137], [316, 63, 444, 173], [35, 51, 234, 190], [186, 98, 303, 176], [0, 112, 38, 137], [36, 93, 189, 191], [59, 98, 295, 286], [236, 10, 438, 285], [258, 153, 400, 273], [84, 163, 256, 285], [2, 100, 284, 290], [0, 162, 255, 290], [81, 218, 128, 273], [1, 241, 91, 291], [261, 156, 448, 291], [256, 49, 458, 293], [368, 249, 447, 290]], "scores": [0.4218183159828186, 0.2047184705734253, 0.3183385133743286, 0.2776392102241516, 0.2521999180316925, 0.41799724102020264, 0.258478045463562, 0.24926616251468658, 0.41959989070892334, 0.2838291823863983, 0.35811159014701843, 0.5840131044387817, 0.5782139897346497, 0.5363598465919495, 0.2894122898578644, 0.5529283285140991, 0.42832809686660767, 0.5663236379623413, 0.2738245725631714, 0.2001977562904358, 0.5374676585197449, 0.5316109657287598, 0.20928449928760529, 0.32419446110725403, 0.2196035534143448, 0.4458579123020172, 0.22883892059326172, 0.2046220749616623, 0.4521254301071167], "labels": ["stick", "cherry", "topping", "cherry", "cherry", "platter", "stick", "stick", "stick", "dessert", "chocolate", "treat", "chocolate", "treat", "treat", "treat", "stick", "treat", "treat", "chocolate", "chocolate", "treat", "treat", "treat", "chocolate", "stick", "lollipop", "stick", "stick"]}, {"id": "VD_video_2_12_1_0", "boxes": [[2055, 143, 2392, 485], [277, 141, 617, 484], [1164, 141, 1508, 485], [2939, 142, 3278, 486], [-2, 3, 895, 618], [-4, 32, 3462, 619]], "scores": [0.432278573513031, 0.449470192193985, 0.4266761243343353, 0.40964147448539734, 0.21650752425193787, 0.29598507285118103], "labels": ["circle", "circle", "circle", "circle", "color", "color"]}, {"id": "VS_chart_1_6_1_1", "boxes": [[-1, 4, 732, 551], [106, 116, 727, 491], [8, 113, 730, 542]], "scores": [0.38329699635505676, 0.44365230202674866, 0.23826031386852264], "labels": ["screenshot", "graph", "graph"]}, {"id": "VS_table_2_1_2_2", "boxes": [[230, 53, 687, 87], [558, 231, 576, 257], [79, 138, 780, 544], [557, 296, 575, 321], [113, 160, 385, 521], [557, 363, 575, 389], [558, 429, 575, 454], [557, 496, 575, 522]], "scores": [0.27645859122276306, 0.2287260890007019, 0.24852722883224487, 0.2534462511539459, 0.23788847029209137, 0.2870651185512543, 0.24783815443515778, 0.2619992196559906], "labels": ["text", "number", "text", "number", "text", "number", "number", "number"]}, {"id": "VS_map_1_0_1_0", "boxes": [[6, 4, 728, 497], [34, 26, 705, 451]], "scores": [0.49390557408332825, 0.70521479845047], "labels": ["map", "map"]}, {"id": "VS_ocr_2_7_2_1", "boxes": [[170, 0, 468, 40], [210, 37, 729, 258], [0, 36, 209, 252], [753, 36, 1248, 257], [13, 269, 1262, 479], [2, 5, 1262, 757], [4, 263, 588, 476], [9, 150, 1251, 567], [598, 258, 1131, 478], [910, 493, 1210, 596], [409, 479, 783, 692], [1, 479, 400, 693], [790, 478, 1273, 692], [974, 695, 1276, 771], [144, 699, 552, 775], [562, 698, 973, 774]], "scores": [0.20903745293617249, 0.32492244243621826, 0.26523834466934204, 0.28249362111091614, 0.2562359571456909, 0.4983612596988678, 0.33915871381759644, 0.2702161967754364, 0.25471174716949463, 0.31106242537498474, 0.318705290555954, 0.36895492672920227, 0.4290233552455902, 0.28053298592567444, 0.3337510824203491, 0.3619234561920166], "labels": ["stone", "stone", "stone", "stone", "plaque", "wall", "plaque", "writing", "plaque", "plaque", "stone", "stone", "stone", "stone", "stone", "stone"]}, {"id": "VD_illusion_2_22_1_0", "boxes": [[38, 114, 182, 257], [343, 407, 570, 641]], "scores": [0.20124724507331848, 0.22871838510036469], "labels": ["point", "line"]}, {"id": "VD_math_1_17_0_1", "boxes": [[273, 23, 305, 62], [0, 3, 635, 561], [83, 88, 560, 453]], "scores": [0.2724394202232361, 0.31459176540374756, 0.693804144859314], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_12_0_0", "boxes": [[117, 5, 220, 231], [199, 42, 308, 236], [298, 34, 402, 234], [26, 62, 145, 231]], "scores": [0.7333469986915588, 0.7771403789520264, 0.7669375538825989, 0.7251868844032288], "labels": ["penguin", "penguin", "penguin", "penguin"]}, {"id": "VD_video_2_12_1_1", "boxes": [[2055, 143, 2392, 485], [277, 141, 617, 484], [1164, 141, 1508, 485], [2939, 142, 3278, 486], [-2, 3, 895, 618], [-4, 32, 3462, 619]], "scores": [0.432278573513031, 0.449470192193985, 0.4266761243343353, 0.40964147448539734, 0.21650752425193787, 0.29598507285118103], "labels": ["circle", "circle", "circle", "circle", "color", "color"]}, {"id": "VS_chart_1_6_1_2", "boxes": [[-1, 4, 732, 551], [106, 116, 727, 491], [8, 113, 730, 542]], "scores": [0.38329699635505676, 0.44365230202674866, 0.23826031386852264], "labels": ["screenshot", "graph", "graph"]}, {"id": "VS_table_0_2_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_0_1_1", "boxes": [[6, 4, 728, 497], [34, 26, 705, 451]], "scores": [0.49390557408332825, 0.70521479845047], "labels": ["map", "map"]}, {"id": "VS_ocr_0_8_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_23_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_1_17_0_2", "boxes": [[273, 23, 305, 62], [0, 3, 635, 561], [83, 88, 560, 453]], "scores": [0.2724394202232361, 0.31459176540374756, 0.693804144859314], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_12_0_1", "boxes": [[117, 5, 220, 231], [199, 42, 308, 236], [298, 34, 402, 234], [26, 62, 145, 231]], "scores": [0.7333469986915588, 0.7771403789520264, 0.7669375538825989, 0.7251868844032288], "labels": ["penguin", "penguin", "penguin", "penguin"]}, {"id": "VD_video_2_12_1_2", "boxes": [[2055, 143, 2392, 485], [277, 141, 617, 484], [1164, 141, 1508, 485], [2939, 142, 3278, 486], [-2, 3, 895, 618], [-4, 32, 3462, 619]], "scores": [0.432278573513031, 0.449470192193985, 0.4266761243343353, 0.40964147448539734, 0.21650752425193787, 0.29598507285118103], "labels": ["circle", "circle", "circle", "circle", "color", "color"]}, {"id": "VS_chart_1_6_1_3", "boxes": [[-1, 4, 732, 551], [106, 116, 727, 491], [8, 113, 730, 542]], "scores": [0.38329699635505676, 0.44365230202674866, 0.23826031386852264], "labels": ["screenshot", "graph", "graph"]}, {"id": "VS_table_0_2_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_0_1_2", "boxes": [[6, 4, 728, 497], [34, 26, 705, 451]], "scores": [0.49390557408332825, 0.70521479845047], "labels": ["map", "map"]}, {"id": "VS_ocr_0_8_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_23_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_17_1_0", "boxes": [[283, 6, 315, 45], [3, 0, 669, 518], [96, 71, 568, 429]], "scores": [0.31307488679885864, 0.32115814089775085, 0.6870630383491516], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_12_1_0", "boxes": [[306, 43, 405, 244], [122, 13, 226, 242], [405, 83, 532, 212], [204, 50, 315, 246], [28, 71, 147, 241]], "scores": [0.7251916527748108, 0.7160181403160095, 0.7455797791481018, 0.7475457191467285, 0.7042621970176697], "labels": ["penguin", "penguin", "penguin", "penguin", "penguin"]}, {"id": "VD_video_2_12_2_0", "boxes": [[278, 141, 619, 486], [330, 196, 561, 429], [1163, 139, 1508, 482], [1918, 192, 2257, 436], [2676, 138, 3020, 481], [13, 29, 3251, 628]], "scores": [0.4150009751319885, 0.20021377503871918, 0.4307868778705597, 0.38185742497444153, 0.42657163739204407, 0.28616371750831604], "labels": ["circle", "circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_2_6_2_0", "boxes": [[2, 5, 830, 621], [83, 128, 818, 552]], "scores": [0.4061880111694336, 0.47050604224205017], "labels": ["graph", "graph"]}, {"id": "VS_table_0_2_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_0_1_3", "boxes": [[6, 4, 728, 497], [34, 26, 705, 451]], "scores": [0.49390557408332825, 0.70521479845047], "labels": ["map", "map"]}, {"id": "VS_ocr_1_8_1_0", "boxes": [[1, 15, 1150, 202], [2, 9, 1155, 221]], "scores": [0.2845337390899658, 0.6380904912948608], "labels": ["text", "text"]}, {"id": "VD_illusion_2_23_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_17_1_1", "boxes": [[283, 6, 315, 45], [3, 0, 669, 518], [96, 71, 568, 429]], "scores": [0.31307488679885864, 0.32115814089775085, 0.6870630383491516], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_2_12_1_1", "boxes": [[306, 43, 405, 244], [122, 13, 226, 242], [405, 83, 532, 212], [204, 50, 315, 246], [28, 71, 147, 241]], "scores": [0.7251916527748108, 0.7160181403160095, 0.7455797791481018, 0.7475457191467285, 0.7042621970176697], "labels": ["penguin", "penguin", "penguin", "penguin", "penguin"]}, {"id": "VD_video_2_12_2_1", "boxes": [[278, 141, 619, 486], [330, 196, 561, 429], [1163, 139, 1508, 482], [1918, 192, 2257, 436], [2676, 138, 3020, 481], [13, 29, 3251, 628]], "scores": [0.4150009751319885, 0.20021377503871918, 0.4307868778705597, 0.38185742497444153, 0.42657163739204407, 0.28616371750831604], "labels": ["circle", "circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_2_6_2_1", "boxes": [[2, 5, 830, 621], [83, 128, 818, 552]], "scores": [0.4061880111694336, 0.47050604224205017], "labels": ["graph", "graph"]}, {"id": "VS_table_1_2_1_0", "boxes": [[407, 243, 424, 268], [407, 314, 425, 339], [434, 314, 451, 339], [409, 385, 421, 409], [435, 385, 451, 410], [409, 456, 420, 480], [434, 456, 451, 481], [407, 527, 426, 552], [434, 527, 451, 552]], "scores": [0.2380235344171524, 0.2485368549823761, 0.21799875795841217, 0.2541126608848572, 0.21534277498722076, 0.244172140955925, 0.2876318693161011, 0.2525359094142914, 0.20944522321224213], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_0_2_0", "boxes": [[5, 3, 728, 501], [34, 26, 705, 451]], "scores": [0.483321875333786, 0.7031587958335876], "labels": ["map", "map"]}, {"id": "VS_ocr_1_8_1_1", "boxes": [[1, 15, 1150, 202], [2, 9, 1155, 221]], "scores": [0.2845337390899658, 0.6380904912948608], "labels": ["text", "text"]}, {"id": "VD_illusion_2_23_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_math_2_17_1_2", "boxes": [[283, 6, 315, 45], [3, 0, 669, 518], [96, 71, 568, 429]], "scores": [0.31307488679885864, 0.32115814089775085, 0.6870630383491516], "labels": ["triangle", "angle", "triangle"]}, {"id": "VD_figure_1_13_0_0", "boxes": [[289, 22, 347, 85], [68, 34, 258, 114], [4, 5, 374, 559], [225, 126, 302, 228], [66, 146, 156, 228], [198, 194, 249, 264], [246, 219, 315, 292], [305, 221, 382, 320], [269, 269, 362, 352], [34, 326, 146, 511], [3, 160, 375, 564], [97, 374, 163, 479], [1, 436, 110, 567], [97, 453, 189, 569], [158, 453, 313, 568]], "scores": [0.34134915471076965, 0.36770710349082947, 0.5644269585609436, 0.24817459285259247, 0.24754853546619415, 0.23306246101856232, 0.21776695549488068, 0.2605619728565216, 0.20831118524074554, 0.2039327323436737, 0.5054113864898682, 0.20562221109867096, 0.2791654169559479, 0.24468110501766205, 0.2731187343597412], "labels": ["poster", "street sign", "animation film", "animal", "street sign", "animal", "animal", "animal", "animal", "animation film", "animation film", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_2_12_2_2", "boxes": [[278, 141, 619, 486], [330, 196, 561, 429], [1163, 139, 1508, 482], [1918, 192, 2257, 436], [2676, 138, 3020, 481], [13, 29, 3251, 628]], "scores": [0.4150009751319885, 0.20021377503871918, 0.4307868778705597, 0.38185742497444153, 0.42657163739204407, 0.28616371750831604], "labels": ["circle", "circle", "circle", "circle", "circle", "color"]}, {"id": "VS_chart_2_6_2_2", "boxes": [[2, 5, 830, 621], [83, 128, 818, 552]], "scores": [0.4061880111694336, 0.47050604224205017], "labels": ["graph", "graph"]}, {"id": "VS_table_1_2_1_1", "boxes": [[407, 243, 424, 268], [407, 314, 425, 339], [434, 314, 451, 339], [409, 385, 421, 409], [435, 385, 451, 410], [409, 456, 420, 480], [434, 456, 451, 481], [407, 527, 426, 552], [434, 527, 451, 552]], "scores": [0.2380235344171524, 0.2485368549823761, 0.21799875795841217, 0.2541126608848572, 0.21534277498722076, 0.244172140955925, 0.2876318693161011, 0.2525359094142914, 0.20944522321224213], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_0_2_1", "boxes": [[5, 3, 728, 501], [34, 26, 705, 451]], "scores": [0.483321875333786, 0.7031587958335876], "labels": ["map", "map"]}, {"id": "VS_ocr_2_8_2_0", "boxes": [[2, 18, 1158, 209], [0, 14, 1182, 227], [6, 3, 1202, 239]], "scores": [0.31478631496429443, 0.65068519115448, 0.3200303316116333], "labels": ["text", "text", "quote"]}, {"id": "VD_illusion_1_24_0_0", "boxes": [[135, 135, 226, 227], [548, 135, 638, 227]], "scores": [0.5813615918159485, 0.5968838334083557], "labels": ["circle", "circle"]}, {"id": "VD_ocr_1_0_0_0", "boxes": [[8, 4, 588, 453], [4, -1, 590, 934], [9, 411, 590, 927], [4, 473, 593, 869], [10, 510, 543, 788]], "scores": [0.49427488446235657, 0.32445237040519714, 0.2865132689476013, 0.3953462541103363, 0.7074630856513977], "labels": ["menu", "menu", "cuisine", "cuisine", "spaghetti"]}, {"id": "VD_figure_1_13_0_1", "boxes": [[289, 22, 347, 85], [68, 34, 258, 114], [4, 5, 374, 559], [225, 126, 302, 228], [66, 146, 156, 228], [198, 194, 249, 264], [246, 219, 315, 292], [305, 221, 382, 320], [269, 269, 362, 352], [34, 326, 146, 511], [3, 160, 375, 564], [97, 374, 163, 479], [1, 436, 110, 567], [97, 453, 189, 569], [158, 453, 313, 568]], "scores": [0.34134915471076965, 0.36770710349082947, 0.5644269585609436, 0.24817459285259247, 0.24754853546619415, 0.23306246101856232, 0.21776695549488068, 0.2605619728565216, 0.20831118524074554, 0.2039327323436737, 0.5054113864898682, 0.20562221109867096, 0.2791654169559479, 0.24468110501766205, 0.2731187343597412], "labels": ["poster", "street sign", "animation film", "animal", "street sign", "animal", "animal", "animal", "animal", "animation film", "animation film", "animation film", "animal", "animal", "animal"]}, {"id": "VD_video_1_13_0_0", "boxes": [[1250, 0, 1424, 217], [451, 0, 708, 238], [2396, 95, 2639, 256], [2399, 92, 2647, 324], [240, 95, 569, 428], [-73, 3, 2689, 582], [1690, 112, 1949, 347], [294, 210, 513, 468], [1697, 202, 1937, 537], [2134, 85, 2830, 549], [713, 1, 1392, 562], [1525, 112, 2006, 564]], "scores": [0.22799675166606903, 0.2761491537094116, 0.22318622469902039, 0.2501506209373474, 0.20784991979599, 0.27650323510169983, 0.2580249011516571, 0.21225306391716003, 0.3142754137516022, 0.22050800919532776, 0.2054722011089325, 0.2305491864681244], "labels": ["face", "face", "hair", "hair", "wig", "selfie", "hair", "face", "face", "man", "selfie", "man"]}, {"id": "VS_chart_2_6_2_3", "boxes": [[2, 5, 830, 621], [83, 128, 818, 552]], "scores": [0.4061880111694336, 0.47050604224205017], "labels": ["graph", "graph"]}, {"id": "VS_table_1_2_1_2", "boxes": [[407, 243, 424, 268], [407, 314, 425, 339], [434, 314, 451, 339], [409, 385, 421, 409], [435, 385, 451, 410], [409, 456, 420, 480], [434, 456, 451, 481], [407, 527, 426, 552], [434, 527, 451, 552]], "scores": [0.2380235344171524, 0.2485368549823761, 0.21799875795841217, 0.2541126608848572, 0.21534277498722076, 0.244172140955925, 0.2876318693161011, 0.2525359094142914, 0.20944522321224213], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_0_2_2", "boxes": [[5, 3, 728, 501], [34, 26, 705, 451]], "scores": [0.483321875333786, 0.7031587958335876], "labels": ["map", "map"]}, {"id": "VS_ocr_2_8_2_1", "boxes": [[2, 18, 1158, 209], [0, 14, 1182, 227], [6, 3, 1202, 239]], "scores": [0.31478631496429443, 0.65068519115448, 0.3200303316116333], "labels": ["text", "text", "quote"]}, {"id": "VD_illusion_1_24_0_1", "boxes": [[135, 135, 226, 227], [548, 135, 638, 227]], "scores": [0.5813615918159485, 0.5968838334083557], "labels": ["circle", "circle"]}, {"id": "VD_ocr_1_0_0_1", "boxes": [[8, 4, 588, 453], [4, -1, 590, 934], [9, 411, 590, 927], [4, 473, 593, 869], [10, 510, 543, 788]], "scores": [0.49427488446235657, 0.32445237040519714, 0.2865132689476013, 0.3953462541103363, 0.7074630856513977], "labels": ["menu", "menu", "cuisine", "cuisine", "spaghetti"]}, {"id": "VD_figure_2_13_1_0", "boxes": [[288, 20, 345, 86], [68, 34, 258, 112], [312, 24, 382, 227], [222, 124, 295, 229], [250, 24, 325, 237], [66, 144, 156, 226], [197, 192, 251, 263], [306, 218, 382, 320], [42, 251, 75, 280], [137, 267, 200, 365], [167, 249, 286, 410], [29, 294, 75, 345], [222, 264, 381, 552], [24, 325, 91, 450], [38, 333, 131, 507], [2, 156, 374, 563], [153, 382, 219, 483], [0, 440, 110, 567], [96, 453, 189, 569], [158, 453, 313, 569]], "scores": [0.3347932994365692, 0.39177921414375305, 0.2513512074947357, 0.21732160449028015, 0.21407581865787506, 0.25060737133026123, 0.21415427327156067, 0.20790724456310272, 0.23020301759243011, 0.2554944157600403, 0.23387601971626282, 0.2266296148300171, 0.25202664732933044, 0.2304951250553131, 0.24814482033252716, 0.23874109983444214, 0.2625992000102997, 0.2519952952861786, 0.2864549160003662, 0.24783499538898468], "labels": ["poster", "street sign", "toy", "animal", "toy", "street sign", "animal", "animal", "toy", "toy", "toy", "animal", "toy", "toy", "toy", "toy", "toy", "toy", "toy", "animal"]}, {"id": "VD_video_1_13_0_1", "boxes": [[1250, 0, 1424, 217], [451, 0, 708, 238], [2396, 95, 2639, 256], [2399, 92, 2647, 324], [240, 95, 569, 428], [-73, 3, 2689, 582], [1690, 112, 1949, 347], [294, 210, 513, 468], [1697, 202, 1937, 537], [2134, 85, 2830, 549], [713, 1, 1392, 562], [1525, 112, 2006, 564]], "scores": [0.22799675166606903, 0.2761491537094116, 0.22318622469902039, 0.2501506209373474, 0.20784991979599, 0.27650323510169983, 0.2580249011516571, 0.21225306391716003, 0.3142754137516022, 0.22050800919532776, 0.2054722011089325, 0.2305491864681244], "labels": ["face", "face", "hair", "hair", "wig", "selfie", "hair", "face", "face", "man", "selfie", "man"]}, {"id": "VS_chart_0_7_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_2_2_0", "boxes": [[415, 241, 426, 265], [440, 241, 457, 266], [413, 312, 430, 336], [440, 312, 457, 337], [415, 383, 426, 407], [440, 383, 458, 408], [415, 454, 426, 478], [440, 454, 457, 479], [413, 525, 431, 550], [440, 525, 457, 550]], "scores": [0.25710880756378174, 0.23044687509536743, 0.2642923891544342, 0.21997293829917908, 0.28054627776145935, 0.2417355328798294, 0.261202335357666, 0.2784087061882019, 0.24861662089824677, 0.23863954842090607], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_0_2_3", "boxes": [[5, 3, 728, 501], [34, 26, 705, 451]], "scores": [0.483321875333786, 0.7031587958335876], "labels": ["map", "map"]}, {"id": "VS_ocr_0_9_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_24_1_0", "boxes": [[123, 131, 214, 223], [536, 131, 627, 223]], "scores": [0.5713327527046204, 0.5867918133735657], "labels": ["circle", "circle"]}, {"id": "VD_ocr_2_0_1_0", "boxes": [[7, 4, 588, 462], [3, 3, 591, 953], [6, 425, 592, 945], [-2, 484, 589, 890], [96, 516, 136, 569], [5, 522, 546, 807], [107, 586, 205, 696]], "scores": [0.48929691314697266, 0.3356163203716278, 0.286493718624115, 0.39337143301963806, 0.20905078947544098, 0.48899880051612854, 0.202846959233284], "labels": ["menu", "menu", "cuisine", "cuisine", "noodle", "pasta", "noodle"]}, {"id": "VD_figure_2_13_1_1", "boxes": [[288, 20, 345, 86], [68, 34, 258, 112], [312, 24, 382, 227], [222, 124, 295, 229], [250, 24, 325, 237], [66, 144, 156, 226], [197, 192, 251, 263], [306, 218, 382, 320], [42, 251, 75, 280], [137, 267, 200, 365], [167, 249, 286, 410], [29, 294, 75, 345], [222, 264, 381, 552], [24, 325, 91, 450], [38, 333, 131, 507], [2, 156, 374, 563], [153, 382, 219, 483], [0, 440, 110, 567], [96, 453, 189, 569], [158, 453, 313, 569]], "scores": [0.3347932994365692, 0.39177921414375305, 0.2513512074947357, 0.21732160449028015, 0.21407581865787506, 0.25060737133026123, 0.21415427327156067, 0.20790724456310272, 0.23020301759243011, 0.2554944157600403, 0.23387601971626282, 0.2266296148300171, 0.25202664732933044, 0.2304951250553131, 0.24814482033252716, 0.23874109983444214, 0.2625992000102997, 0.2519952952861786, 0.2864549160003662, 0.24783499538898468], "labels": ["poster", "street sign", "toy", "animal", "toy", "street sign", "animal", "animal", "toy", "toy", "toy", "animal", "toy", "toy", "toy", "toy", "toy", "toy", "toy", "animal"]}, {"id": "VD_video_1_13_0_2", "boxes": [[1250, 0, 1424, 217], [451, 0, 708, 238], [2396, 95, 2639, 256], [2399, 92, 2647, 324], [240, 95, 569, 428], [-73, 3, 2689, 582], [1690, 112, 1949, 347], [294, 210, 513, 468], [1697, 202, 1937, 537], [2134, 85, 2830, 549], [713, 1, 1392, 562], [1525, 112, 2006, 564]], "scores": [0.22799675166606903, 0.2761491537094116, 0.22318622469902039, 0.2501506209373474, 0.20784991979599, 0.27650323510169983, 0.2580249011516571, 0.21225306391716003, 0.3142754137516022, 0.22050800919532776, 0.2054722011089325, 0.2305491864681244], "labels": ["face", "face", "hair", "hair", "wig", "selfie", "hair", "face", "face", "man", "selfie", "man"]}, {"id": "VS_chart_0_7_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_2_2_1", "boxes": [[415, 241, 426, 265], [440, 241, 457, 266], [413, 312, 430, 336], [440, 312, 457, 337], [415, 383, 426, 407], [440, 383, 458, 408], [415, 454, 426, 478], [440, 454, 457, 479], [413, 525, 431, 550], [440, 525, 457, 550]], "scores": [0.25710880756378174, 0.23044687509536743, 0.2642923891544342, 0.21997293829917908, 0.28054627776145935, 0.2417355328798294, 0.261202335357666, 0.2784087061882019, 0.24861662089824677, 0.23863954842090607], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_1_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_9_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_24_1_1", "boxes": [[123, 131, 214, 223], [536, 131, 627, 223]], "scores": [0.5713327527046204, 0.5867918133735657], "labels": ["circle", "circle"]}, {"id": "VD_ocr_2_0_1_1", "boxes": [[7, 4, 588, 462], [3, 3, 591, 953], [6, 425, 592, 945], [-2, 484, 589, 890], [96, 516, 136, 569], [5, 522, 546, 807], [107, 586, 205, 696]], "scores": [0.48929691314697266, 0.3356163203716278, 0.286493718624115, 0.39337143301963806, 0.20905078947544098, 0.48899880051612854, 0.202846959233284], "labels": ["menu", "menu", "cuisine", "cuisine", "noodle", "pasta", "noodle"]}, {"id": "VD_figure_1_14_0_0", "boxes": [[100, 0, 228, 86], [221, -1, 343, 114], [293, 0, 485, 111], [8, 2, 495, 306], [306, 80, 487, 195], [13, 31, 243, 241], [301, 88, 493, 302], [329, 181, 487, 303], [289, 212, 343, 254], [150, 166, 332, 306], [278, 181, 354, 256], [28, 171, 213, 307], [279, 180, 355, 302]], "scores": [0.29639750719070435, 0.31781697273254395, 0.22364702820777893, 0.5093066692352295, 0.28964558243751526, 0.3647957146167755, 0.21604639291763306, 0.27430596947669983, 0.23997998237609863, 0.2822154760360718, 0.3493918776512146, 0.23056906461715698, 0.34025701880455017], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_1_14_0_0", "boxes": [[1840, 328, 2219, 471], [2906, 342, 3355, 468]], "scores": [0.3935706913471222, 0.44860631227493286], "labels": ["hurdle", "hurdle"]}, {"id": "VS_chart_0_7_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_2_2_2", "boxes": [[415, 241, 426, 265], [440, 241, 457, 266], [413, 312, 430, 336], [440, 312, 457, 337], [415, 383, 426, 407], [440, 383, 458, 408], [415, 454, 426, 478], [440, 454, 457, 479], [413, 525, 431, 550], [440, 525, 457, 550]], "scores": [0.25710880756378174, 0.23044687509536743, 0.2642923891544342, 0.21997293829917908, 0.28054627776145935, 0.2417355328798294, 0.261202335357666, 0.2784087061882019, 0.24861662089824677, 0.23863954842090607], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_1_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_9_1_0", "boxes": [[4, 13, 1168, 74], [-1, 10, 1189, 220], [-4, 44, 1182, 203], [5, 99, 1187, 202]], "scores": [0.3046632409095764, 0.6721110343933105, 0.2186744660139084, 0.28241220116615295], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_25_0_0", "boxes": [[48, 137, 228, 321], [51, 135, 470, 318], [351, 169, 468, 288], [94, 183, 183, 275], [368, 183, 456, 274]], "scores": [0.7038182020187378, 0.25262653827667236, 0.6828789710998535, 0.4344685673713684, 0.35520806908607483], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_1_1_0_0", "boxes": [[9, 7, 649, 684], [43, 295, 623, 602], [208, 267, 582, 459], [356, 265, 601, 434], [92, 342, 247, 442], [130, 265, 611, 551], [164, 380, 444, 515], [88, 263, 618, 584]], "scores": [0.257739782333374, 0.34642910957336426, 0.3648207485675812, 0.2226540744304657, 0.22715207934379578, 0.3350938856601715, 0.35485604405403137, 0.2812788784503937], "labels": ["poster", "cuisine", "pork", "pork", "vegetable", "pork", "pork", "cuisine"]}, {"id": "VD_figure_1_14_0_1", "boxes": [[100, 0, 228, 86], [221, -1, 343, 114], [293, 0, 485, 111], [8, 2, 495, 306], [306, 80, 487, 195], [13, 31, 243, 241], [301, 88, 493, 302], [329, 181, 487, 303], [289, 212, 343, 254], [150, 166, 332, 306], [278, 181, 354, 256], [28, 171, 213, 307], [279, 180, 355, 302]], "scores": [0.29639750719070435, 0.31781697273254395, 0.22364702820777893, 0.5093066692352295, 0.28964558243751526, 0.3647957146167755, 0.21604639291763306, 0.27430596947669983, 0.23997998237609863, 0.2822154760360718, 0.3493918776512146, 0.23056906461715698, 0.34025701880455017], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VD_video_1_14_0_1", "boxes": [[1840, 328, 2219, 471], [2906, 342, 3355, 468]], "scores": [0.3935706913471222, 0.44860631227493286], "labels": ["hurdle", "hurdle"]}, {"id": "VS_chart_0_7_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_2_3_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_1_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_9_1_1", "boxes": [[4, 13, 1168, 74], [-1, 10, 1189, 220], [-4, 44, 1182, 203], [5, 99, 1187, 202]], "scores": [0.3046632409095764, 0.6721110343933105, 0.2186744660139084, 0.28241220116615295], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_25_0_1", "boxes": [[48, 137, 228, 321], [51, 135, 470, 318], [351, 169, 468, 288], [94, 183, 183, 275], [368, 183, 456, 274]], "scores": [0.7038182020187378, 0.25262653827667236, 0.6828789710998535, 0.4344685673713684, 0.35520806908607483], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_1_1_0_1", "boxes": [[9, 7, 649, 684], [43, 295, 623, 602], [208, 267, 582, 459], [356, 265, 601, 434], [92, 342, 247, 442], [130, 265, 611, 551], [164, 380, 444, 515], [88, 263, 618, 584]], "scores": [0.257739782333374, 0.34642910957336426, 0.3648207485675812, 0.2226540744304657, 0.22715207934379578, 0.3350938856601715, 0.35485604405403137, 0.2812788784503937], "labels": ["poster", "cuisine", "pork", "pork", "vegetable", "pork", "pork", "cuisine"]}, {"id": "VD_figure_2_14_1_0", "boxes": [[26, 0, 226, 97], [102, 0, 229, 88], [223, 0, 345, 113], [231, 0, 374, 144], [11, 30, 244, 244], [4, 3, 498, 308], [308, 92, 497, 215], [280, 183, 357, 255], [294, 214, 340, 253], [151, 167, 332, 308], [280, 183, 354, 305], [26, 172, 212, 308], [374, 214, 475, 308]], "scores": [0.22740858793258667, 0.28374770283699036, 0.31449660658836365, 0.25284647941589355, 0.3886539041996002, 0.5217195749282837, 0.363695353269577, 0.32528358697891235, 0.22658808529376984, 0.29621726274490356, 0.38586583733558655, 0.3952859342098236, 0.40174832940101624], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon character", "anime", "cartoon", "anime", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "anime", "anime"]}, {"id": "VD_video_1_14_0_2", "boxes": [[1840, 328, 2219, 471], [2906, 342, 3355, 468]], "scores": [0.3935706913471222, 0.44860631227493286], "labels": ["hurdle", "hurdle"]}, {"id": "VS_chart_1_7_1_0", "boxes": [[5, -1, 804, 724], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34652528166770935, 0.21476122736930847, 0.4846596121788025, 0.294402152299881, 0.42742180824279785], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_2_2_3_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_1_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_2_9_2_0", "boxes": [[0, 10, 1165, 73], [-3, 9, 1202, 221], [-2, 20, 1186, 204], [1, 74, 1193, 204]], "scores": [0.30672138929367065, 0.6377802491188049, 0.21674297749996185, 0.3480106294155121], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_25_0_2", "boxes": [[48, 137, 228, 321], [51, 135, 470, 318], [351, 169, 468, 288], [94, 183, 183, 275], [368, 183, 456, 274]], "scores": [0.7038182020187378, 0.25262653827667236, 0.6828789710998535, 0.4344685673713684, 0.35520806908607483], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_2_1_1_0", "boxes": [[10, 11, 665, 706], [43, 308, 639, 621], [208, 275, 611, 471], [134, 275, 624, 560], [93, 355, 250, 458], [92, 275, 631, 599], [167, 390, 458, 534]], "scores": [0.2822532057762146, 0.36279091238975525, 0.3613778054714203, 0.3932521641254425, 0.23767241835594177, 0.24905063211917877, 0.30303260684013367], "labels": ["poster", "cuisine", "pork", "pork", "vegetable", "cuisine", "pork"]}, {"id": "VD_figure_2_14_1_1", "boxes": [[26, 0, 226, 97], [102, 0, 229, 88], [223, 0, 345, 113], [231, 0, 374, 144], [11, 30, 244, 244], [4, 3, 498, 308], [308, 92, 497, 215], [280, 183, 357, 255], [294, 214, 340, 253], [151, 167, 332, 308], [280, 183, 354, 305], [26, 172, 212, 308], [374, 214, 475, 308]], "scores": [0.22740858793258667, 0.28374770283699036, 0.31449660658836365, 0.25284647941589355, 0.3886539041996002, 0.5217195749282837, 0.363695353269577, 0.32528358697891235, 0.22658808529376984, 0.29621726274490356, 0.38586583733558655, 0.3952859342098236, 0.40174832940101624], "labels": ["cartoon character", "cartoon character", "cartoon character", "cartoon character", "anime", "cartoon", "anime", "cartoon character", "cartoon character", "cartoon character", "cartoon character", "anime", "anime"]}, {"id": "VD_video_1_14_0_3", "boxes": [[1840, 328, 2219, 471], [2906, 342, 3355, 468]], "scores": [0.3935706913471222, 0.44860631227493286], "labels": ["hurdle", "hurdle"]}, {"id": "VS_chart_1_7_1_1", "boxes": [[5, -1, 804, 724], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34652528166770935, 0.21476122736930847, 0.4846596121788025, 0.294402152299881, 0.42742180824279785], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_2_2_3_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_1_1_0", "boxes": [[1, 1, 507, 290], [9, 23, 502, 281]], "scores": [0.5257891416549683, 0.3900397717952728], "labels": ["map", "map"]}, {"id": "VS_ocr_2_9_2_1", "boxes": [[0, 10, 1165, 73], [-3, 9, 1202, 221], [-2, 20, 1186, 204], [1, 74, 1193, 204]], "scores": [0.30672138929367065, 0.6377802491188049, 0.21674297749996185, 0.3480106294155121], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_2_25_1_0", "boxes": [[30, 58, 212, 243], [34, 58, 458, 239], [335, 89, 452, 210], [351, 104, 441, 196], [59, 85, 186, 215]], "scores": [0.6625072360038757, 0.2235184907913208, 0.6538572311401367, 0.2972352206707001, 0.35931214690208435], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_2_1_1_1", "boxes": [[10, 11, 665, 706], [43, 308, 639, 621], [208, 275, 611, 471], [134, 275, 624, 560], [93, 355, 250, 458], [92, 275, 631, 599], [167, 390, 458, 534]], "scores": [0.2822532057762146, 0.36279091238975525, 0.3613778054714203, 0.3932521641254425, 0.23767241835594177, 0.24905063211917877, 0.30303260684013367], "labels": ["poster", "cuisine", "pork", "pork", "vegetable", "cuisine", "pork"]}, {"id": "VD_figure_1_15_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_video_2_14_1_0", "boxes": [[915, 328, 1291, 471], [313, 280, 759, 438], [160, 341, 605, 468], [1345, 340, 1790, 484], [3093, 411, 3514, 516]], "scores": [0.42970067262649536, 0.31089168787002563, 0.4713747799396515, 0.243068128824234, 0.23857800662517548], "labels": ["hurdle", "jump", "hurdle", "jump", "jump"]}, {"id": "VS_chart_1_7_1_2", "boxes": [[5, -1, 804, 724], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34652528166770935, 0.21476122736930847, 0.4846596121788025, 0.294402152299881, 0.42742180824279785], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_0_3_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_1_1_1", "boxes": [[1, 1, 507, 290], [9, 23, 502, 281]], "scores": [0.5257891416549683, 0.3900397717952728], "labels": ["map", "map"]}, {"id": "VS_ocr_0_10_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_25_1_1", "boxes": [[30, 58, 212, 243], [34, 58, 458, 239], [335, 89, 452, 210], [351, 104, 441, 196], [59, 85, 186, 215]], "scores": [0.6625072360038757, 0.2235184907913208, 0.6538572311401367, 0.2972352206707001, 0.35931214690208435], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_1_2_0_0", "boxes": [[4, -2, 1182, 1756], [2, 27, 1182, 1331], [34, 275, 1170, 1658]], "scores": [0.606389582157135, 0.21316029131412506, 0.3383658230304718], "labels": ["movie poster", "rain", "actor"]}, {"id": "VD_figure_1_15_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_video_2_14_1_1", "boxes": [[915, 328, 1291, 471], [313, 280, 759, 438], [160, 341, 605, 468], [1345, 340, 1790, 484], [3093, 411, 3514, 516]], "scores": [0.42970067262649536, 0.31089168787002563, 0.4713747799396515, 0.243068128824234, 0.23857800662517548], "labels": ["hurdle", "jump", "hurdle", "jump", "jump"]}, {"id": "VS_chart_1_7_1_3", "boxes": [[5, -1, 804, 724], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34652528166770935, 0.21476122736930847, 0.4846596121788025, 0.294402152299881, 0.42742180824279785], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_0_3_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_1_1_2", "boxes": [[1, 1, 507, 290], [9, 23, 502, 281]], "scores": [0.5257891416549683, 0.3900397717952728], "labels": ["map", "map"]}, {"id": "VS_ocr_0_10_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_25_1_2", "boxes": [[30, 58, 212, 243], [34, 58, 458, 239], [335, 89, 452, 210], [351, 104, 441, 196], [59, 85, 186, 215]], "scores": [0.6625072360038757, 0.2235184907913208, 0.6538572311401367, 0.2972352206707001, 0.35931214690208435], "labels": ["circle", "circle", "circle", "circle", "circle"]}, {"id": "VD_ocr_1_2_0_1", "boxes": [[4, -2, 1182, 1756], [2, 27, 1182, 1331], [34, 275, 1170, 1658]], "scores": [0.606389582157135, 0.21316029131412506, 0.3383658230304718], "labels": ["movie poster", "rain", "actor"]}, {"id": "VD_figure_2_15_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_video_2_14_1_2", "boxes": [[915, 328, 1291, 471], [313, 280, 759, 438], [160, 341, 605, 468], [1345, 340, 1790, 484], [3093, 411, 3514, 516]], "scores": [0.42970067262649536, 0.31089168787002563, 0.4713747799396515, 0.243068128824234, 0.23857800662517548], "labels": ["hurdle", "jump", "hurdle", "jump", "jump"]}, {"id": "VS_chart_2_7_2_0", "boxes": [[5, -1, 803, 723], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34629711508750916, 0.21593037247657776, 0.4843893349170685, 0.29381710290908813, 0.4253985285758972], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_0_3_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_1_1_3", "boxes": [[1, 1, 507, 290], [9, 23, 502, 281]], "scores": [0.5257891416549683, 0.3900397717952728], "labels": ["map", "map"]}, {"id": "VS_ocr_1_10_1_0", "boxes": [[4, 12, 1158, 172], [-4, 1, 1167, 421], [-1, 8, 1159, 397], [4, 5, 707, 406]], "scores": [0.24035625159740448, 0.6293388605117798, 0.4048070013523102, 0.2135564386844635], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_26_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_2_2_1_0", "boxes": [[4, 3, 489, 730], [13, 108, 484, 558]], "scores": [0.6712089776992798, 0.30431249737739563], "labels": ["movie poster", "actor"]}, {"id": "VD_figure_2_15_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_video_2_14_1_3", "boxes": [[915, 328, 1291, 471], [313, 280, 759, 438], [160, 341, 605, 468], [1345, 340, 1790, 484], [3093, 411, 3514, 516]], "scores": [0.42970067262649536, 0.31089168787002563, 0.4713747799396515, 0.243068128824234, 0.23857800662517548], "labels": ["hurdle", "jump", "hurdle", "jump", "jump"]}, {"id": "VS_chart_2_7_2_1", "boxes": [[5, -1, 803, 723], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34629711508750916, 0.21593037247657776, 0.4843893349170685, 0.29381710290908813, 0.4253985285758972], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_1_3_1_0", "boxes": [[8, 4, 1008, 28], [-1, 1, 1023, 524], [3, 94, 1027, 491]], "scores": [0.3069758713245392, 0.3725549280643463, 0.24243246018886566], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_2_1_2_0", "boxes": [[4, 10, 1519, 886], [28, 75, 1492, 859]], "scores": [0.6458103656768799, 0.3041074573993683], "labels": ["map", "map"]}, {"id": "VS_ocr_1_10_1_1", "boxes": [[4, 12, 1158, 172], [-4, 1, 1167, 421], [-1, 8, 1159, 397], [4, 5, 707, 406]], "scores": [0.24035625159740448, 0.6293388605117798, 0.4048070013523102, 0.2135564386844635], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_2_26_1_0", "boxes": [[144, 142, 201, 400]], "scores": [0.22277940809726715], "labels": ["line"]}, {"id": "VD_ocr_2_2_1_1", "boxes": [[4, 3, 489, 730], [13, 108, 484, 558]], "scores": [0.6712089776992798, 0.30431249737739563], "labels": ["movie poster", "actor"]}, {"id": "VD_figure_1_16_0_0", "boxes": [[3, 8, 498, 503], [248, 114, 309, 213], [145, 137, 301, 298], [60, 203, 121, 378], [288, 171, 459, 404], [8, 322, 91, 442], [102, 376, 144, 417], [363, 362, 486, 477], [116, 393, 138, 414], [205, 381, 338, 483]], "scores": [0.2537347972393036, 0.5876095294952393, 0.33236533403396606, 0.30126962065696716, 0.26407843828201294, 0.24794569611549377, 0.33139240741729736, 0.3488277792930603, 0.2487235963344574, 0.33784380555152893], "labels": ["cartoon", "shovel", "cartoon character", "shovel", "cartoon character", "cartoon character", "shovel", "bug", "shovel", "bug"]}, {"id": "VD_video_2_14_2_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_7_2_2", "boxes": [[5, -1, 803, 723], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34629711508750916, 0.21593037247657776, 0.4843893349170685, 0.29381710290908813, 0.4253985285758972], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_1_3_1_1", "boxes": [[8, 4, 1008, 28], [-1, 1, 1023, 524], [3, 94, 1027, 491]], "scores": [0.3069758713245392, 0.3725549280643463, 0.24243246018886566], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_2_1_2_1", "boxes": [[4, 10, 1519, 886], [28, 75, 1492, 859]], "scores": [0.6458103656768799, 0.3041074573993683], "labels": ["map", "map"]}, {"id": "VS_ocr_2_10_2_0", "boxes": [[1, 9, 1187, 425], [0, 23, 1167, 402]], "scores": [0.6411426663398743, 0.44063031673431396], "labels": ["text", "text"]}, {"id": "VD_illusion_1_27_0_0", "boxes": [[91, 86, 513, 192], [450, 88, 507, 189], [130, 275, 177, 374], [136, 322, 464, 328], [131, 275, 473, 381]], "scores": [0.3417420983314514, 0.21618907153606415, 0.20396170020103455, 0.21478867530822754, 0.44869211316108704], "labels": ["arrow", "point", "point", "line", "arrow"]}, {"id": "VD_ocr_1_3_0_0", "boxes": [[25, 19, 476, 466], [348, 46, 455, 150], [33, 198, 455, 467], [28, 201, 308, 467], [257, 195, 457, 408], [159, 203, 310, 408], [250, 197, 459, 467], [352, 334, 452, 466], [236, 333, 455, 468]], "scores": [0.4073571562767029, 0.2610180675983429, 0.41321122646331787, 0.48627132177352905, 0.5956416130065918, 0.24889317154884338, 0.28978702425956726, 0.24160149693489075, 0.4779573380947113], "labels": ["package", "cheese", "pizza", "slice", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_1_16_0_1", "boxes": [[3, 8, 498, 503], [248, 114, 309, 213], [145, 137, 301, 298], [60, 203, 121, 378], [288, 171, 459, 404], [8, 322, 91, 442], [102, 376, 144, 417], [363, 362, 486, 477], [116, 393, 138, 414], [205, 381, 338, 483]], "scores": [0.2537347972393036, 0.5876095294952393, 0.33236533403396606, 0.30126962065696716, 0.26407843828201294, 0.24794569611549377, 0.33139240741729736, 0.3488277792930603, 0.2487235963344574, 0.33784380555152893], "labels": ["cartoon", "shovel", "cartoon character", "shovel", "cartoon character", "cartoon character", "shovel", "bug", "shovel", "bug"]}, {"id": "VD_video_2_14_2_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_2_7_2_3", "boxes": [[5, -1, 803, 723], [74, 296, 792, 595], [90, 305, 792, 598], [60, 588, 803, 602], [2, 299, 793, 730]], "scores": [0.34629711508750916, 0.21593037247657776, 0.4843893349170685, 0.29381710290908813, 0.4253985285758972], "labels": ["graph", "graph", "graph", "line", "graph"]}, {"id": "VS_table_1_3_1_2", "boxes": [[8, 4, 1008, 28], [-1, 1, 1023, 524], [3, 94, 1027, 491]], "scores": [0.3069758713245392, 0.3725549280643463, 0.24243246018886566], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_2_1_2_2", "boxes": [[4, 10, 1519, 886], [28, 75, 1492, 859]], "scores": [0.6458103656768799, 0.3041074573993683], "labels": ["map", "map"]}, {"id": "VS_ocr_2_10_2_1", "boxes": [[1, 9, 1187, 425], [0, 23, 1167, 402]], "scores": [0.6411426663398743, 0.44063031673431396], "labels": ["text", "text"]}, {"id": "VD_illusion_1_27_0_1", "boxes": [[91, 86, 513, 192], [450, 88, 507, 189], [130, 275, 177, 374], [136, 322, 464, 328], [131, 275, 473, 381]], "scores": [0.3417420983314514, 0.21618907153606415, 0.20396170020103455, 0.21478867530822754, 0.44869211316108704], "labels": ["arrow", "point", "point", "line", "arrow"]}, {"id": "VD_ocr_1_3_0_1", "boxes": [[25, 19, 476, 466], [348, 46, 455, 150], [33, 198, 455, 467], [28, 201, 308, 467], [257, 195, 457, 408], [159, 203, 310, 408], [250, 197, 459, 467], [352, 334, 452, 466], [236, 333, 455, 468]], "scores": [0.4073571562767029, 0.2610180675983429, 0.41321122646331787, 0.48627132177352905, 0.5956416130065918, 0.24889317154884338, 0.28978702425956726, 0.24160149693489075, 0.4779573380947113], "labels": ["package", "cheese", "pizza", "slice", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_2_16_1_0", "boxes": [[3, 7, 499, 498], [248, 108, 309, 206], [145, 130, 301, 292], [60, 194, 122, 370], [294, 164, 461, 397], [1, 290, 114, 442], [104, 371, 138, 408], [363, 356, 486, 471]], "scores": [0.2728753387928009, 0.5243037939071655, 0.3084728419780731, 0.2678658366203308, 0.24098575115203857, 0.2756417393684387, 0.31301870942115784, 0.2257263958454132], "labels": ["cartoon", "shovel", "cartoon character", "shovel", "cartoon character", "cartoon character", "shovel", "cartoon character"]}, {"id": "VD_video_2_14_2_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_8_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_3_2_0", "boxes": [[277, 8, 840, 312], [15, -5, 1036, 519], [162, -3, 850, 521]], "scores": [0.2857162356376648, 0.32288116216659546, 0.3879697620868683], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_2_1_2_3", "boxes": [[4, 10, 1519, 886], [28, 75, 1492, 859]], "scores": [0.6458103656768799, 0.3041074573993683], "labels": ["map", "map"]}, {"id": "VS_ocr_0_11_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_27_0_2", "boxes": [[91, 86, 513, 192], [450, 88, 507, 189], [130, 275, 177, 374], [136, 322, 464, 328], [131, 275, 473, 381]], "scores": [0.3417420983314514, 0.21618907153606415, 0.20396170020103455, 0.21478867530822754, 0.44869211316108704], "labels": ["arrow", "point", "point", "line", "arrow"]}, {"id": "VD_ocr_1_3_0_2", "boxes": [[25, 19, 476, 466], [348, 46, 455, 150], [33, 198, 455, 467], [28, 201, 308, 467], [257, 195, 457, 408], [159, 203, 310, 408], [250, 197, 459, 467], [352, 334, 452, 466], [236, 333, 455, 468]], "scores": [0.4073571562767029, 0.2610180675983429, 0.41321122646331787, 0.48627132177352905, 0.5956416130065918, 0.24889317154884338, 0.28978702425956726, 0.24160149693489075, 0.4779573380947113], "labels": ["package", "cheese", "pizza", "slice", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_2_16_1_1", "boxes": [[3, 7, 499, 498], [248, 108, 309, 206], [145, 130, 301, 292], [60, 194, 122, 370], [294, 164, 461, 397], [1, 290, 114, 442], [104, 371, 138, 408], [363, 356, 486, 471]], "scores": [0.2728753387928009, 0.5243037939071655, 0.3084728419780731, 0.2678658366203308, 0.24098575115203857, 0.2756417393684387, 0.31301870942115784, 0.2257263958454132], "labels": ["cartoon", "shovel", "cartoon character", "shovel", "cartoon character", "cartoon character", "shovel", "cartoon character"]}, {"id": "VD_video_2_14_2_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_chart_0_8_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_3_2_1", "boxes": [[277, 8, 840, 312], [15, -5, 1036, 519], [162, -3, 850, 521]], "scores": [0.2857162356376648, 0.32288116216659546, 0.3879697620868683], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_0_2_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_11_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_27_1_0", "boxes": [[53, 61, 473, 168], [418, 63, 471, 166], [158, 248, 436, 354]], "scores": [0.3491692841053009, 0.2244429886341095, 0.47845926880836487], "labels": ["arrow", "point", "arrow"]}, {"id": "VD_ocr_2_3_1_0", "boxes": [[528, 43, 585, 176], [455, 42, 585, 177], [33, 10, 617, 590], [45, 238, 592, 589], [335, 237, 599, 512], [42, 243, 402, 589], [204, 253, 390, 509], [461, 421, 588, 589], [301, 418, 590, 590]], "scores": [0.2216046005487442, 0.3455922603607178, 0.2734379768371582, 0.42187169194221497, 0.6248536705970764, 0.4715602397918701, 0.20876388251781464, 0.26432546973228455, 0.5241595506668091], "labels": ["cheese", "cheese", "recipe", "pizza", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_1_17_0_0", "boxes": [[78, 4, 393, 240], [56, 0, 218, 92], [265, 33, 332, 157], [-1, 3, 385, 392], [60, 66, 201, 201], [9, 3, 378, 393], [67, 199, 202, 322], [196, 275, 366, 377], [27, 255, 92, 395], [102, 302, 202, 369]], "scores": [0.29864412546157837, 0.2820635139942169, 0.20082291960716248, 0.40888500213623047, 0.28679028153419495, 0.3379197418689728, 0.2525760233402252, 0.33275994658470154, 0.36439424753189087, 0.2184717059135437], "labels": ["basket", "chicken wing", "chicken wing", "basket", "chicken", "tray", "chicken", "chicken", "chicken", "chicken"]}, {"id": "VD_video_1_15_0_0", "boxes": [[557, 47, 595, 160], [1528, 44, 1561, 173], [2499, 45, 2528, 179], [3468, 47, 3498, 171]], "scores": [0.34606674313545227, 0.314288467168808, 0.31171390414237976, 0.2967776656150818], "labels": ["spear", "spear", "spear", "spear"]}, {"id": "VS_chart_0_8_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_3_2_2", "boxes": [[277, 8, 840, 312], [15, -5, 1036, 519], [162, -3, 850, 521]], "scores": [0.2857162356376648, 0.32288116216659546, 0.3879697620868683], "labels": ["text", "screenshot", "text"]}, {"id": "VS_map_0_2_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_11_1_0", "boxes": [[8, 15, 1182, 322], [1, 15, 1193, 423], [4, -2, 1217, 443], [17, 342, 1132, 425]], "scores": [0.39645466208457947, 0.5506275296211243, 0.29215896129608154, 0.2594415545463562], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_2_27_1_1", "boxes": [[53, 61, 473, 168], [418, 63, 471, 166], [158, 248, 436, 354]], "scores": [0.3491692841053009, 0.2244429886341095, 0.47845926880836487], "labels": ["arrow", "point", "arrow"]}, {"id": "VD_ocr_2_3_1_1", "boxes": [[528, 43, 585, 176], [455, 42, 585, 177], [33, 10, 617, 590], [45, 238, 592, 589], [335, 237, 599, 512], [42, 243, 402, 589], [204, 253, 390, 509], [461, 421, 588, 589], [301, 418, 590, 590]], "scores": [0.2216046005487442, 0.3455922603607178, 0.2734379768371582, 0.42187169194221497, 0.6248536705970764, 0.4715602397918701, 0.20876388251781464, 0.26432546973228455, 0.5241595506668091], "labels": ["cheese", "cheese", "recipe", "pizza", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_1_17_0_1", "boxes": [[78, 4, 393, 240], [56, 0, 218, 92], [265, 33, 332, 157], [-1, 3, 385, 392], [60, 66, 201, 201], [9, 3, 378, 393], [67, 199, 202, 322], [196, 275, 366, 377], [27, 255, 92, 395], [102, 302, 202, 369]], "scores": [0.29864412546157837, 0.2820635139942169, 0.20082291960716248, 0.40888500213623047, 0.28679028153419495, 0.3379197418689728, 0.2525760233402252, 0.33275994658470154, 0.36439424753189087, 0.2184717059135437], "labels": ["basket", "chicken wing", "chicken wing", "basket", "chicken", "tray", "chicken", "chicken", "chicken", "chicken"]}, {"id": "VD_video_1_15_0_1", "boxes": [[557, 47, 595, 160], [1528, 44, 1561, 173], [2499, 45, 2528, 179], [3468, 47, 3498, 171]], "scores": [0.34606674313545227, 0.314288467168808, 0.31171390414237976, 0.2967776656150818], "labels": ["spear", "spear", "spear", "spear"]}, {"id": "VS_chart_1_8_1_0", "boxes": [[0, 49, 779, 564], [704, 104, 712, 116], [239, 95, 723, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.3781140446662903, 0.2574912905693054, 0.3500686287879944, 0.24211366474628448, 0.23417788743972778, 0.22674663364887238, 0.22186169028282166, 0.2279987335205078, 0.24450133740901947, 0.20099739730358124, 0.23038563132286072, 0.2195906937122345, 0.2204270213842392, 0.21523097157478333, 0.2281801849603653], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_0_4_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_2_1_0", "boxes": [[4, 1, 506, 296], [19, 28, 484, 265], [68, 35, 461, 231], [122, 242, 409, 268]], "scores": [0.2520885467529297, 0.6064925193786621, 0.2561507821083069, 0.38790348172187805], "labels": ["map", "earth", "continent", "continent"]}, {"id": "VS_ocr_1_11_1_1", "boxes": [[8, 15, 1182, 322], [1, 15, 1193, 423], [4, -2, 1217, 443], [17, 342, 1132, 425]], "scores": [0.39645466208457947, 0.5506275296211243, 0.29215896129608154, 0.2594415545463562], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_2_27_1_2", "boxes": [[53, 61, 473, 168], [418, 63, 471, 166], [158, 248, 436, 354]], "scores": [0.3491692841053009, 0.2244429886341095, 0.47845926880836487], "labels": ["arrow", "point", "arrow"]}, {"id": "VD_ocr_2_3_1_2", "boxes": [[528, 43, 585, 176], [455, 42, 585, 177], [33, 10, 617, 590], [45, 238, 592, 589], [335, 237, 599, 512], [42, 243, 402, 589], [204, 253, 390, 509], [461, 421, 588, 589], [301, 418, 590, 590]], "scores": [0.2216046005487442, 0.3455922603607178, 0.2734379768371582, 0.42187169194221497, 0.6248536705970764, 0.4715602397918701, 0.20876388251781464, 0.26432546973228455, 0.5241595506668091], "labels": ["cheese", "cheese", "recipe", "pizza", "slice", "slice", "slice", "slice", "slice"]}, {"id": "VD_figure_2_17_1_0", "boxes": [[54, 3, 394, 242], [0, 3, 391, 394], [255, 2, 398, 222], [56, 0, 221, 92], [22, 2, 378, 248], [267, 34, 337, 165], [-2, 3, 386, 391], [59, 67, 200, 201], [210, 79, 291, 246], [180, 144, 241, 246], [8, 3, 381, 392], [68, 201, 179, 321], [164, 242, 401, 396], [25, 249, 166, 396], [26, 257, 92, 397], [176, 248, 398, 394], [2, 196, 389, 395]], "scores": [0.383517861366272, 0.2920360863208771, 0.32338884472846985, 0.33179596066474915, 0.27992597222328186, 0.23633833229541779, 0.34496814012527466, 0.32648909091949463, 0.23711448907852173, 0.26646965742111206, 0.324724406003952, 0.24688880145549774, 0.2055177092552185, 0.2369009256362915, 0.4449394941329956, 0.6628013849258423, 0.2521461248397827], "labels": ["basket", "tray", "basket", "chicken", "chicken", "chicken wing", "basket", "chicken", "chicken", "chicken wing", "tray", "chicken", "tray", "chicken", "chicken", "cat", "basket"]}, {"id": "VD_video_2_15_1_0", "boxes": [[580, 0, 609, 110], [1529, 0, 1559, 123], [2470, -1, 2503, 115], [3413, 0, 3452, 102]], "scores": [0.2919716238975525, 0.2769654393196106, 0.266452431678772, 0.3296511769294739], "labels": ["spear", "spear", "spear", "spear"]}, {"id": "VS_chart_1_8_1_1", "boxes": [[0, 49, 779, 564], [704, 104, 712, 116], [239, 95, 723, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.3781140446662903, 0.2574912905693054, 0.3500686287879944, 0.24211366474628448, 0.23417788743972778, 0.22674663364887238, 0.22186169028282166, 0.2279987335205078, 0.24450133740901947, 0.20099739730358124, 0.23038563132286072, 0.2195906937122345, 0.2204270213842392, 0.21523097157478333, 0.2281801849603653], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_0_4_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_2_1_1", "boxes": [[4, 1, 506, 296], [19, 28, 484, 265], [68, 35, 461, 231], [122, 242, 409, 268]], "scores": [0.2520885467529297, 0.6064925193786621, 0.2561507821083069, 0.38790348172187805], "labels": ["map", "earth", "continent", "continent"]}, {"id": "VS_ocr_2_11_2_0", "boxes": [[9, 25, 1193, 341], [2, 25, 1199, 433], [4, -3, 1214, 450], [10, 353, 1133, 434]], "scores": [0.4454263746738434, 0.598825216293335, 0.2737369239330292, 0.2568037509918213], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_28_0_0", "boxes": [[54, 71, 476, 176], [387, 258, 434, 359], [95, 256, 437, 364], [97, 305, 425, 312]], "scores": [0.321946918964386, 0.2224312424659729, 0.47958987951278687, 0.2105226367712021], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_2_3_2_0", "boxes": [[510, 47, 564, 177], [434, 47, 565, 183], [8, 10, 598, 597], [15, 248, 376, 595], [314, 243, 580, 519], [21, 240, 570, 592], [289, 420, 568, 595]], "scores": [0.2625885605812073, 0.3688904941082001, 0.4077540338039398, 0.5160791277885437, 0.6407093405723572, 0.3972286880016327, 0.524898111820221], "labels": ["cheese", "cheese", "package", "slice", "slice", "pizza", "slice"]}, {"id": "VD_figure_2_17_1_1", "boxes": [[54, 3, 394, 242], [0, 3, 391, 394], [255, 2, 398, 222], [56, 0, 221, 92], [22, 2, 378, 248], [267, 34, 337, 165], [-2, 3, 386, 391], [59, 67, 200, 201], [210, 79, 291, 246], [180, 144, 241, 246], [8, 3, 381, 392], [68, 201, 179, 321], [164, 242, 401, 396], [25, 249, 166, 396], [26, 257, 92, 397], [176, 248, 398, 394], [2, 196, 389, 395]], "scores": [0.383517861366272, 0.2920360863208771, 0.32338884472846985, 0.33179596066474915, 0.27992597222328186, 0.23633833229541779, 0.34496814012527466, 0.32648909091949463, 0.23711448907852173, 0.26646965742111206, 0.324724406003952, 0.24688880145549774, 0.2055177092552185, 0.2369009256362915, 0.4449394941329956, 0.6628013849258423, 0.2521461248397827], "labels": ["basket", "tray", "basket", "chicken", "chicken", "chicken wing", "basket", "chicken", "chicken", "chicken wing", "tray", "chicken", "tray", "chicken", "chicken", "cat", "basket"]}, {"id": "VD_video_2_15_1_1", "boxes": [[580, 0, 609, 110], [1529, 0, 1559, 123], [2470, -1, 2503, 115], [3413, 0, 3452, 102]], "scores": [0.2919716238975525, 0.2769654393196106, 0.266452431678772, 0.3296511769294739], "labels": ["spear", "spear", "spear", "spear"]}, {"id": "VS_chart_1_8_1_2", "boxes": [[0, 49, 779, 564], [704, 104, 712, 116], [239, 95, 723, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.3781140446662903, 0.2574912905693054, 0.3500686287879944, 0.24211366474628448, 0.23417788743972778, 0.22674663364887238, 0.22186169028282166, 0.2279987335205078, 0.24450133740901947, 0.20099739730358124, 0.23038563132286072, 0.2195906937122345, 0.2204270213842392, 0.21523097157478333, 0.2281801849603653], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_0_4_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_2_2_2_0", "boxes": [[11, -1, 1806, 1021], [101, 111, 1620, 897], [238, 131, 1557, 788], [795, 148, 1482, 685], [420, 822, 1381, 908]], "scores": [0.3935088813304901, 0.46876060962677, 0.38656502962112427, 0.20458421111106873, 0.5878363251686096], "labels": ["graph", "earth", "continent", "continent", "continent"]}, {"id": "VS_ocr_2_11_2_1", "boxes": [[9, 25, 1193, 341], [2, 25, 1199, 433], [4, -3, 1214, 450], [10, 353, 1133, 434]], "scores": [0.4454263746738434, 0.598825216293335, 0.2737369239330292, 0.2568037509918213], "labels": ["text", "text", "text", "text"]}, {"id": "VD_illusion_1_28_0_1", "boxes": [[54, 71, 476, 176], [387, 258, 434, 359], [95, 256, 437, 364], [97, 305, 425, 312]], "scores": [0.321946918964386, 0.2224312424659729, 0.47958987951278687, 0.2105226367712021], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_2_3_2_1", "boxes": [[510, 47, 564, 177], [434, 47, 565, 183], [8, 10, 598, 597], [15, 248, 376, 595], [314, 243, 580, 519], [21, 240, 570, 592], [289, 420, 568, 595]], "scores": [0.2625885605812073, 0.3688904941082001, 0.4077540338039398, 0.5160791277885437, 0.6407093405723572, 0.3972286880016327, 0.524898111820221], "labels": ["cheese", "cheese", "package", "slice", "slice", "pizza", "slice"]}, {"id": "VD_figure_1_18_0_0", "boxes": [[53, 148, 288, 427], [213, 366, 257, 408], [4, 196, 339, 513], [225, 394, 270, 434], [144, 401, 189, 439], [188, 400, 225, 441], [75, 397, 114, 437], [255, 406, 298, 450], [102, 420, 143, 461], [220, 428, 262, 468], [138, 432, 180, 468], [180, 436, 221, 471]], "scores": [0.8541305065155029, 0.5826289057731628, 0.590019166469574, 0.5937806963920593, 0.5819481611251831, 0.5893908739089966, 0.6064274907112122, 0.5820119976997375, 0.5881686806678772, 0.5795769095420837, 0.5846264958381653, 0.5940710306167603], "labels": ["cock", "egg", "hay", "egg", "egg", "egg", "egg", "egg", "egg", "egg", "egg", "egg"]}, {"id": "VD_video_1_16_0_0", "boxes": [[2280, 5, 2389, 207], [572, 2, 683, 292], [1151, 5, 1269, 283], [669, 21, 1308, 499], [2018, 21, 2718, 500], [1354, 21, 1933, 496], [10, 1, 679, 499], [6, 30, 2666, 492], [7, 3, 1326, 496]], "scores": [0.22479315102100372, 0.4172569811344147, 0.3354993760585785, 0.26142022013664246, 0.2847521901130676, 0.2845022678375244, 0.2639312148094177, 0.20726722478866577, 0.21665634214878082], "labels": ["pillar", "pillar", "pillar", "floor", "floor", "floor", "floor", "photo", "floor"]}, {"id": "VS_chart_2_8_2_0", "boxes": [[0, 50, 779, 564], [704, 104, 712, 116], [238, 95, 722, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.38002026081085205, 0.2563152015209198, 0.34049659967422485, 0.24119625985622406, 0.23377202451229095, 0.22596709430217743, 0.22104237973690033, 0.22747012972831726, 0.24374406039714813, 0.2010045051574707, 0.22945211827754974, 0.21911275386810303, 0.21994072198867798, 0.21483853459358215, 0.22786810994148254], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_1_4_1_0", "boxes": [[739, 261, 757, 286], [739, 332, 757, 358], [739, 403, 756, 428], [739, 474, 756, 499], [739, 544, 757, 571]], "scores": [0.30592554807662964, 0.25839897990226746, 0.29671719670295715, 0.25090569257736206, 0.22661705315113068], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_map_2_2_2_1", "boxes": [[11, -1, 1806, 1021], [101, 111, 1620, 897], [238, 131, 1557, 788], [795, 148, 1482, 685], [420, 822, 1381, 908]], "scores": [0.3935088813304901, 0.46876060962677, 0.38656502962112427, 0.20458421111106873, 0.5878363251686096], "labels": ["graph", "earth", "continent", "continent", "continent"]}, {"id": "VS_ocr_0_12_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_28_0_2", "boxes": [[54, 71, 476, 176], [387, 258, 434, 359], [95, 256, 437, 364], [97, 305, 425, 312]], "scores": [0.321946918964386, 0.2224312424659729, 0.47958987951278687, 0.2105226367712021], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_2_3_2_2", "boxes": [[510, 47, 564, 177], [434, 47, 565, 183], [8, 10, 598, 597], [15, 248, 376, 595], [314, 243, 580, 519], [21, 240, 570, 592], [289, 420, 568, 595]], "scores": [0.2625885605812073, 0.3688904941082001, 0.4077540338039398, 0.5160791277885437, 0.6407093405723572, 0.3972286880016327, 0.524898111820221], "labels": ["cheese", "cheese", "package", "slice", "slice", "pizza", "slice"]}, {"id": "VD_figure_1_18_0_1", "boxes": [[53, 148, 288, 427], [213, 366, 257, 408], [4, 196, 339, 513], [225, 394, 270, 434], [144, 401, 189, 439], [188, 400, 225, 441], [75, 397, 114, 437], [255, 406, 298, 450], [102, 420, 143, 461], [220, 428, 262, 468], [138, 432, 180, 468], [180, 436, 221, 471]], "scores": [0.8541305065155029, 0.5826289057731628, 0.590019166469574, 0.5937806963920593, 0.5819481611251831, 0.5893908739089966, 0.6064274907112122, 0.5820119976997375, 0.5881686806678772, 0.5795769095420837, 0.5846264958381653, 0.5940710306167603], "labels": ["cock", "egg", "hay", "egg", "egg", "egg", "egg", "egg", "egg", "egg", "egg", "egg"]}, {"id": "VD_video_1_16_0_1", "boxes": [[2280, 5, 2389, 207], [572, 2, 683, 292], [1151, 5, 1269, 283], [669, 21, 1308, 499], [2018, 21, 2718, 500], [1354, 21, 1933, 496], [10, 1, 679, 499], [6, 30, 2666, 492], [7, 3, 1326, 496]], "scores": [0.22479315102100372, 0.4172569811344147, 0.3354993760585785, 0.26142022013664246, 0.2847521901130676, 0.2845022678375244, 0.2639312148094177, 0.20726722478866577, 0.21665634214878082], "labels": ["pillar", "pillar", "pillar", "floor", "floor", "floor", "floor", "photo", "floor"]}, {"id": "VS_chart_2_8_2_1", "boxes": [[0, 50, 779, 564], [704, 104, 712, 116], [238, 95, 722, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.38002026081085205, 0.2563152015209198, 0.34049659967422485, 0.24119625985622406, 0.23377202451229095, 0.22596709430217743, 0.22104237973690033, 0.22747012972831726, 0.24374406039714813, 0.2010045051574707, 0.22945211827754974, 0.21911275386810303, 0.21994072198867798, 0.21483853459358215, 0.22786810994148254], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_1_4_1_1", "boxes": [[739, 261, 757, 286], [739, 332, 757, 358], [739, 403, 756, 428], [739, 474, 756, 499], [739, 544, 757, 571]], "scores": [0.30592554807662964, 0.25839897990226746, 0.29671719670295715, 0.25090569257736206, 0.22661705315113068], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_map_0_3_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_0_12_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_28_1_0", "boxes": [[78, 67, 499, 174], [65, 256, 509, 362], [461, 256, 506, 357], [66, 303, 503, 311]], "scores": [0.2958541810512543, 0.4509473741054535, 0.257711797952652, 0.24497218430042267], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_1_4_0_0", "boxes": [[19, 22, 338, 440]], "scores": [0.45798709988594055], "labels": ["pork"]}, {"id": "VD_figure_2_18_1_0", "boxes": [[52, 83, 290, 364], [3, 130, 339, 451], [216, 304, 261, 337], [246, 331, 270, 336], [75, 333, 115, 374], [145, 339, 186, 375], [218, 352, 254, 374], [103, 358, 145, 400], [140, 370, 183, 407], [183, 334, 325, 430], [209, 383, 239, 406]], "scores": [0.8219160437583923, 0.579401969909668, 0.549746572971344, 0.27640876173973083, 0.5675625205039978, 0.5347354412078857, 0.21702252328395844, 0.5287767052650452, 0.5311737656593323, 0.5176106691360474, 0.20178520679473877], "labels": ["cock", "hay", "egg", "egg", "egg", "egg", "potato", "egg", "egg", "potato", "potato"]}, {"id": "VD_video_2_16_1_0", "boxes": [[1382, 21, 1972, 506], [2042, 11, 2715, 506], [681, 25, 1321, 505], [5, 19, 660, 505], [1, 55, 2667, 518]], "scores": [0.25283530354499817, 0.314947247505188, 0.2660904824733734, 0.22917529940605164, 0.24022530019283295], "labels": ["floor", "floor", "floor", "floor", "photo"]}, {"id": "VS_chart_2_8_2_2", "boxes": [[0, 50, 779, 564], [704, 104, 712, 116], [238, 95, 722, 560], [545, 145, 553, 155], [545, 184, 553, 194], [545, 223, 553, 234], [545, 262, 553, 272], [545, 302, 553, 313], [467, 340, 475, 352], [467, 380, 475, 392], [467, 380, 475, 392], [467, 420, 475, 432], [467, 459, 475, 471], [467, 498, 475, 510], [467, 537, 475, 549]], "scores": [0.38002026081085205, 0.2563152015209198, 0.34049659967422485, 0.24119625985622406, 0.23377202451229095, 0.22596709430217743, 0.22104237973690033, 0.22747012972831726, 0.24374406039714813, 0.2010045051574707, 0.22945211827754974, 0.21911275386810303, 0.21994072198867798, 0.21483853459358215, 0.22786810994148254], "labels": ["graph", "number", "graph", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_table_1_4_1_2", "boxes": [[739, 261, 757, 286], [739, 332, 757, 358], [739, 403, 756, 428], [739, 474, 756, 499], [739, 544, 757, 571]], "scores": [0.30592554807662964, 0.25839897990226746, 0.29671719670295715, 0.25090569257736206, 0.22661705315113068], "labels": ["number", "number", "number", "number", "number"]}, {"id": "VS_map_0_3_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_ocr_1_12_1_0", "boxes": [[12, 19, 1180, 217], [4, 13, 1200, 500], [1, -1, 1220, 529], [13, 18, 1189, 477], [17, 303, 1195, 465]], "scores": [0.22556886076927185, 0.6281719207763672, 0.44914910197257996, 0.3407899737358093, 0.217185378074646], "labels": ["text", "text", "text", "text", "text"]}, {"id": "VD_illusion_2_28_1_1", "boxes": [[78, 67, 499, 174], [65, 256, 509, 362], [461, 256, 506, 357], [66, 303, 503, 311]], "scores": [0.2958541810512543, 0.4509473741054535, 0.257711797952652, 0.24497218430042267], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_1_4_0_1", "boxes": [[19, 22, 338, 440]], "scores": [0.45798709988594055], "labels": ["pork"]}, {"id": "VD_figure_2_18_1_1", "boxes": [[52, 83, 290, 364], [3, 130, 339, 451], [216, 304, 261, 337], [246, 331, 270, 336], [75, 333, 115, 374], [145, 339, 186, 375], [218, 352, 254, 374], [103, 358, 145, 400], [140, 370, 183, 407], [183, 334, 325, 430], [209, 383, 239, 406]], "scores": [0.8219160437583923, 0.579401969909668, 0.549746572971344, 0.27640876173973083, 0.5675625205039978, 0.5347354412078857, 0.21702252328395844, 0.5287767052650452, 0.5311737656593323, 0.5176106691360474, 0.20178520679473877], "labels": ["cock", "hay", "egg", "egg", "egg", "egg", "potato", "egg", "egg", "potato", "potato"]}, {"id": "VD_video_2_16_1_1", "boxes": [[1382, 21, 1972, 506], [2042, 11, 2715, 506], [681, 25, 1321, 505], [5, 19, 660, 505], [1, 55, 2667, 518]], "scores": [0.25283530354499817, 0.314947247505188, 0.2660904824733734, 0.22917529940605164, 0.24022530019283295], "labels": ["floor", "floor", "floor", "floor", "photo"]}, {"id": "VS_chart_0_9_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_4_2_0", "boxes": [[647, 276, 665, 302], [1008, 277, 1027, 303], [647, 347, 665, 373], [1008, 348, 1027, 373], [647, 419, 665, 444], [1008, 419, 1027, 444], [647, 490, 666, 516], [964, 490, 983, 516], [964, 561, 983, 587], [1009, 560, 1027, 586]], "scores": [0.2524215877056122, 0.21666252613067627, 0.20885872840881348, 0.2542933523654938, 0.24436937272548676, 0.2444998174905777, 0.20497359335422516, 0.20533600449562073, 0.22275042533874512, 0.22761867940425873], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_3_1_0", "boxes": [[4, 7, 915, 505], [69, 12, 710, 441], [5, 338, 303, 496]], "scores": [0.466207891702652, 0.6231783628463745, 0.25105875730514526], "labels": ["map", "map", "map"]}, {"id": "VS_ocr_1_12_1_1", "boxes": [[12, 19, 1180, 217], [4, 13, 1200, 500], [1, -1, 1220, 529], [13, 18, 1189, 477], [17, 303, 1195, 465]], "scores": [0.22556886076927185, 0.6281719207763672, 0.44914910197257996, 0.3407899737358093, 0.217185378074646], "labels": ["text", "text", "text", "text", "text"]}, {"id": "VD_illusion_2_28_1_2", "boxes": [[78, 67, 499, 174], [65, 256, 509, 362], [461, 256, 506, 357], [66, 303, 503, 311]], "scores": [0.2958541810512543, 0.4509473741054535, 0.257711797952652, 0.24497218430042267], "labels": ["arrow", "arrow", "arrow", "line"]}, {"id": "VD_ocr_2_4_1_0", "boxes": [[4, 46, 269, 396]], "scores": [0.31743934750556946], "labels": ["pork"]}, {"id": "VD_figure_1_19_0_0", "boxes": [[29, 1, 265, 294]], "scores": [0.2935658395290375], "labels": ["cartoon"]}, {"id": "VD_video_1_17_0_0", "boxes": [[6, 11, 3354, 795], [1699, 4, 2257, 784], [71, 47, 544, 789], [894, 30, 1398, 787], [2533, 0, 3142, 781]], "scores": [0.43515416979789734, 0.2937668561935425, 0.2447792887687683, 0.26133936643600464, 0.28534266352653503], "labels": ["cartoon", "cartoon", "cartoon", "cartoon", "cartoon"]}, {"id": "VS_chart_0_9_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_4_2_1", "boxes": [[647, 276, 665, 302], [1008, 277, 1027, 303], [647, 347, 665, 373], [1008, 348, 1027, 373], [647, 419, 665, 444], [1008, 419, 1027, 444], [647, 490, 666, 516], [964, 490, 983, 516], [964, 561, 983, 587], [1009, 560, 1027, 586]], "scores": [0.2524215877056122, 0.21666252613067627, 0.20885872840881348, 0.2542933523654938, 0.24436937272548676, 0.2444998174905777, 0.20497359335422516, 0.20533600449562073, 0.22275042533874512, 0.22761867940425873], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_1_3_1_1", "boxes": [[4, 7, 915, 505], [69, 12, 710, 441], [5, 338, 303, 496]], "scores": [0.466207891702652, 0.6231783628463745, 0.25105875730514526], "labels": ["map", "map", "map"]}, {"id": "VS_ocr_2_12_2_0", "boxes": [[1, 8, 1191, 520], [9, 22, 1170, 488]], "scores": [0.6374272108078003, 0.45803865790367126], "labels": ["text", "text"]}, {"id": "VD_illusion_1_29_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_2_4_1_1", "boxes": [[4, 46, 269, 396]], "scores": [0.31743934750556946], "labels": ["pork"]}, {"id": "VD_figure_1_19_0_1", "boxes": [[29, 1, 265, 294]], "scores": [0.2935658395290375], "labels": ["cartoon"]}, {"id": "VD_video_1_17_0_1", "boxes": [[6, 11, 3354, 795], [1699, 4, 2257, 784], [71, 47, 544, 789], [894, 30, 1398, 787], [2533, 0, 3142, 781]], "scores": [0.43515416979789734, 0.2937668561935425, 0.2447792887687683, 0.26133936643600464, 0.28534266352653503], "labels": ["cartoon", "cartoon", "cartoon", "cartoon", "cartoon"]}, {"id": "VS_chart_0_9_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_4_2_2", "boxes": [[647, 276, 665, 302], [1008, 277, 1027, 303], [647, 347, 665, 373], [1008, 348, 1027, 373], [647, 419, 665, 444], [1008, 419, 1027, 444], [647, 490, 666, 516], [964, 490, 983, 516], [964, 561, 983, 587], [1009, 560, 1027, 586]], "scores": [0.2524215877056122, 0.21666252613067627, 0.20885872840881348, 0.2542933523654938, 0.24436937272548676, 0.2444998174905777, 0.20497359335422516, 0.20533600449562073, 0.22275042533874512, 0.22761867940425873], "labels": ["number", "number", "number", "number", "number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_3_2_0", "boxes": [[4, 7, 915, 505], [69, 12, 711, 440], [5, 338, 303, 496]], "scores": [0.4583536982536316, 0.6288056373596191, 0.24410448968410492], "labels": ["map", "map", "map"]}, {"id": "VS_ocr_2_12_2_1", "boxes": [[1, 8, 1191, 520], [9, 22, 1170, 488]], "scores": [0.6374272108078003, 0.45803865790367126], "labels": ["text", "text"]}, {"id": "VD_illusion_1_29_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_1_5_0_0", "boxes": [[32, 119, 52, 143], [21, 122, 84, 187], [18, 151, 31, 171], [21, 154, 33, 172], [38, 148, 62, 173], [38, 147, 83, 185], [57, 157, 84, 185], [11, 153, 81, 188], [224, 180, 247, 198], [166, 184, 188, 202], [276, 184, 298, 203], [120, 193, 144, 210], [45, 181, 303, 250], [91, 208, 115, 228], [19, 221, 49, 248], [70, 227, 95, 247], [49, 187, 306, 286], [43, 188, 231, 287], [32, 189, 311, 433], [60, 277, 85, 298], [216, 217, 311, 433], [246, 307, 274, 331], [32, 275, 219, 404], [218, 285, 311, 435]], "scores": [0.3901057839393616, 0.41003936529159546, 0.21644249558448792, 0.4067399203777313, 0.3422165513038635, 0.3789636790752411, 0.3761330246925354, 0.21819081902503967, 0.36028560996055603, 0.35761791467666626, 0.3932737708091736, 0.36475396156311035, 0.2252168357372284, 0.3816295266151428, 0.37652796506881714, 0.3692076802253723, 0.21177855134010315, 0.27769550681114197, 0.45163393020629883, 0.4253421127796173, 0.3573338985443115, 0.3964744508266449, 0.4335440695285797, 0.3017662763595581], "labels": ["cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cake", "cake", "cake", "cherry", "cake", "cherry", "cake", "cake"]}, {"id": "VD_figure_2_19_1_0", "boxes": [[1, 2, 314, 395], [0, 226, 97, 311]], "scores": [0.22112111747264862, 0.33398810029029846], "labels": ["cartoon", "cheese"]}, {"id": "VD_video_1_17_0_2", "boxes": [[6, 11, 3354, 795], [1699, 4, 2257, 784], [71, 47, 544, 789], [894, 30, 1398, 787], [2533, 0, 3142, 781]], "scores": [0.43515416979789734, 0.2937668561935425, 0.2447792887687683, 0.26133936643600464, 0.28534266352653503], "labels": ["cartoon", "cartoon", "cartoon", "cartoon", "cartoon"]}, {"id": "VS_chart_0_9_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_table_2_4_3_0", "boxes": [[840, 263, 857, 287], [841, 328, 857, 352], [799, 393, 816, 416], [840, 393, 856, 416], [840, 457, 856, 480], [840, 520, 857, 545]], "scores": [0.295575350522995, 0.25694534182548523, 0.2080932855606079, 0.2887706458568573, 0.2438235878944397, 0.23970897495746613], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_2_3_2_1", "boxes": [[4, 7, 915, 505], [69, 12, 711, 440], [5, 338, 303, 496]], "scores": [0.4583536982536316, 0.6288056373596191, 0.24410448968410492], "labels": ["map", "map", "map"]}, {"id": "VD_illusion_1_29_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_1_5_0_1", "boxes": [[32, 119, 52, 143], [21, 122, 84, 187], [18, 151, 31, 171], [21, 154, 33, 172], [38, 148, 62, 173], [38, 147, 83, 185], [57, 157, 84, 185], [11, 153, 81, 188], [224, 180, 247, 198], [166, 184, 188, 202], [276, 184, 298, 203], [120, 193, 144, 210], [45, 181, 303, 250], [91, 208, 115, 228], [19, 221, 49, 248], [70, 227, 95, 247], [49, 187, 306, 286], [43, 188, 231, 287], [32, 189, 311, 433], [60, 277, 85, 298], [216, 217, 311, 433], [246, 307, 274, 331], [32, 275, 219, 404], [218, 285, 311, 435]], "scores": [0.3901057839393616, 0.41003936529159546, 0.21644249558448792, 0.4067399203777313, 0.3422165513038635, 0.3789636790752411, 0.3761330246925354, 0.21819081902503967, 0.36028560996055603, 0.35761791467666626, 0.3932737708091736, 0.36475396156311035, 0.2252168357372284, 0.3816295266151428, 0.37652796506881714, 0.3692076802253723, 0.21177855134010315, 0.27769550681114197, 0.45163393020629883, 0.4253421127796173, 0.3573338985443115, 0.3964744508266449, 0.4335440695285797, 0.3017662763595581], "labels": ["cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cake", "cake", "cake", "cherry", "cake", "cherry", "cake", "cake"]}, {"id": "VD_figure_2_19_1_1", "boxes": [[1, 2, 314, 395], [0, 226, 97, 311]], "scores": [0.22112111747264862, 0.33398810029029846], "labels": ["cartoon", "cheese"]}, {"id": "VD_video_1_17_0_3", "boxes": [[6, 11, 3354, 795], [1699, 4, 2257, 784], [71, 47, 544, 789], [894, 30, 1398, 787], [2533, 0, 3142, 781]], "scores": [0.43515416979789734, 0.2937668561935425, 0.2447792887687683, 0.26133936643600464, 0.28534266352653503], "labels": ["cartoon", "cartoon", "cartoon", "cartoon", "cartoon"]}, {"id": "VS_chart_1_9_1_0", "boxes": [[497, 19, 509, 49], [0, 1, 725, 456], [0, 103, 722, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.22031170129776, 0.3910457193851471, 0.24736636877059937, 0.2203035205602646, 0.44835877418518066, 0.20446039736270905], "labels": ["number", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_2_4_3_1", "boxes": [[840, 263, 857, 287], [841, 328, 857, 352], [799, 393, 816, 416], [840, 393, 856, 416], [840, 457, 856, 480], [840, 520, 857, 545]], "scores": [0.295575350522995, 0.25694534182548523, 0.2080932855606079, 0.2887706458568573, 0.2438235878944397, 0.23970897495746613], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_4_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_29_1_0", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_2_5_1_0", "boxes": [[53, 127, 73, 151], [39, 125, 103, 194], [41, 125, 88, 182], [48, 128, 104, 194], [58, 157, 83, 183], [58, 156, 105, 194], [40, 162, 53, 181], [77, 166, 105, 195], [64, 191, 330, 262], [189, 194, 212, 212], [249, 189, 272, 208], [302, 193, 325, 213], [142, 203, 167, 220], [113, 219, 137, 239], [38, 232, 70, 259], [91, 238, 116, 259], [32, 232, 79, 277], [66, 195, 327, 301], [52, 203, 339, 449], [61, 199, 250, 301], [80, 289, 107, 311], [50, 209, 252, 421], [244, 230, 341, 451], [271, 320, 299, 345], [249, 318, 339, 378], [52, 285, 243, 420], [242, 310, 341, 451]], "scores": [0.3824000656604767, 0.2657105028629303, 0.21770323812961578, 0.3950997591018677, 0.33448153734207153, 0.31650224328041077, 0.39488840103149414, 0.36146077513694763, 0.22248655557632446, 0.3759481906890869, 0.35631251335144043, 0.4001293480396271, 0.3678191900253296, 0.39411333203315735, 0.37366801500320435, 0.3734111487865448, 0.249280646443367, 0.26775678992271423, 0.42487069964408875, 0.22603574395179749, 0.41231653094291687, 0.20604880154132843, 0.35549893975257874, 0.4003743529319763, 0.2191939353942871, 0.4254228174686432, 0.27156832814216614], "labels": ["cherry", "cherry", "cake", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cake", "cake", "cake", "cake", "cherry", "cake", "cake", "cherry", "cream", "cake", "cake"]}, {"id": "VD_video_1_0_0_0", "boxes": [[3, 8, 237, 247], [256, 8, 489, 246], [7, 6, 497, 248]], "scores": [0.549877941608429, 0.5355732440948486, 0.32893142104148865], "labels": ["pattern", "pattern", "pattern"]}, {"id": "VD_video_2_17_1_0", "boxes": [[0, 9, 3358, 799], [1767, 54, 2229, 794], [890, 35, 1398, 794], [2537, 1, 3143, 786], [14, 5, 580, 795]], "scores": [0.4733721911907196, 0.2729922831058502, 0.26933056116104126, 0.28680482506752014, 0.2850727140903473], "labels": ["cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VS_chart_1_9_1_1", "boxes": [[497, 19, 509, 49], [0, 1, 725, 456], [0, 103, 722, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.22031170129776, 0.3910457193851471, 0.24736636877059937, 0.2203035205602646, 0.44835877418518066, 0.20446039736270905], "labels": ["number", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_2_4_3_2", "boxes": [[840, 263, 857, 287], [841, 328, 857, 352], [799, 393, 816, 416], [840, 393, 856, 416], [840, 457, 856, 480], [840, 520, 857, 545]], "scores": [0.295575350522995, 0.25694534182548523, 0.2080932855606079, 0.2887706458568573, 0.2438235878944397, 0.23970897495746613], "labels": ["number", "number", "number", "number", "number", "number"]}, {"id": "VS_map_0_4_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_29_1_1", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_2_5_1_1", "boxes": [[53, 127, 73, 151], [39, 125, 103, 194], [41, 125, 88, 182], [48, 128, 104, 194], [58, 157, 83, 183], [58, 156, 105, 194], [40, 162, 53, 181], [77, 166, 105, 195], [64, 191, 330, 262], [189, 194, 212, 212], [249, 189, 272, 208], [302, 193, 325, 213], [142, 203, 167, 220], [113, 219, 137, 239], [38, 232, 70, 259], [91, 238, 116, 259], [32, 232, 79, 277], [66, 195, 327, 301], [52, 203, 339, 449], [61, 199, 250, 301], [80, 289, 107, 311], [50, 209, 252, 421], [244, 230, 341, 451], [271, 320, 299, 345], [249, 318, 339, 378], [52, 285, 243, 420], [242, 310, 341, 451]], "scores": [0.3824000656604767, 0.2657105028629303, 0.21770323812961578, 0.3950997591018677, 0.33448153734207153, 0.31650224328041077, 0.39488840103149414, 0.36146077513694763, 0.22248655557632446, 0.3759481906890869, 0.35631251335144043, 0.4001293480396271, 0.3678191900253296, 0.39411333203315735, 0.37366801500320435, 0.3734111487865448, 0.249280646443367, 0.26775678992271423, 0.42487069964408875, 0.22603574395179749, 0.41231653094291687, 0.20604880154132843, 0.35549893975257874, 0.4003743529319763, 0.2191939353942871, 0.4254228174686432, 0.27156832814216614], "labels": ["cherry", "cherry", "cake", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cherry", "cake", "cake", "cake", "cake", "cherry", "cake", "cake", "cherry", "cream", "cake", "cake"]}, {"id": "VD_video_2_0_1_0", "boxes": [[7, 6, 242, 243], [7, 3, 496, 245], [262, 6, 488, 243]], "scores": [0.5619966983795166, 0.31406527757644653, 0.5343501567840576], "labels": ["pattern", "pattern", "pattern"]}, {"id": "VD_video_2_17_1_1", "boxes": [[0, 9, 3358, 799], [1767, 54, 2229, 794], [890, 35, 1398, 794], [2537, 1, 3143, 786], [14, 5, 580, 795]], "scores": [0.4733721911907196, 0.2729922831058502, 0.26933056116104126, 0.28680482506752014, 0.2850727140903473], "labels": ["cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VS_chart_1_9_1_2", "boxes": [[497, 19, 509, 49], [0, 1, 725, 456], [0, 103, 722, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.22031170129776, 0.3910457193851471, 0.24736636877059937, 0.2203035205602646, 0.44835877418518066, 0.20446039736270905], "labels": ["number", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_0_5_0_0", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_4_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_2_29_1_2", "boxes": [], "scores": [], "labels": []}, {"id": "VD_ocr_1_6_0_0", "boxes": [[48, 192, 368, 475], [119, 197, 368, 470], [217, 249, 370, 463]], "scores": [0.5206640958786011, 0.239045187830925, 0.238351508975029], "labels": ["cake", "icing", "cream"]}, {"id": "VD_video_1_1_0_0", "boxes": [[140, 51, 388, 474], [20, 8, 1906, 484]], "scores": [0.26059427857398987, 0.4401450753211975], "labels": ["cartoon character", "cartoon"]}, {"id": "VD_video_2_17_1_2", "boxes": [[0, 9, 3358, 799], [1767, 54, 2229, 794], [890, 35, 1398, 794], [2537, 1, 3143, 786], [14, 5, 580, 795]], "scores": [0.4733721911907196, 0.2729922831058502, 0.26933056116104126, 0.28680482506752014, 0.2850727140903473], "labels": ["cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VS_chart_1_9_1_3", "boxes": [[497, 19, 509, 49], [0, 1, 725, 456], [0, 103, 722, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.22031170129776, 0.3910457193851471, 0.24736636877059937, 0.2203035205602646, 0.44835877418518066, 0.20446039736270905], "labels": ["number", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_0_5_0_1", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_0_4_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VD_illusion_1_30_0_0", "boxes": [[78, 124, 522, 356], [517, 347, 524, 579], [79, 356, 525, 585], [983, 346, 1262, 582], [987, 429, 1254, 504], [93, 124, 1255, 804], [519, 506, 990, 578], [980, 509, 1255, 584], [74, 582, 521, 812]], "scores": [0.21848195791244507, 0.210903137922287, 0.33126649260520935, 0.20631878077983856, 0.22087714076042175, 0.4765531122684479, 0.20398187637329102, 0.25809353590011597, 0.2796716094017029], "labels": ["square", "line", "rectangle", "rectangle", "rectangle", "grid", "line", "rectangle", "rectangle"]}, {"id": "VD_ocr_1_6_0_1", "boxes": [[48, 192, 368, 475], [119, 197, 368, 470], [217, 249, 370, 463]], "scores": [0.5206640958786011, 0.239045187830925, 0.238351508975029], "labels": ["cake", "icing", "cream"]}, {"id": "VD_video_1_1_0_1", "boxes": [[140, 51, 388, 474], [20, 8, 1906, 484]], "scores": [0.26059427857398987, 0.4401450753211975], "labels": ["cartoon character", "cartoon"]}, {"id": "VD_video_2_17_1_3", "boxes": [[0, 9, 3358, 799], [1767, 54, 2229, 794], [890, 35, 1398, 794], [2537, 1, 3143, 786], [14, 5, 580, 795]], "scores": [0.4733721911907196, 0.2729922831058502, 0.26933056116104126, 0.28680482506752014, 0.2850727140903473], "labels": ["cartoon", "cartoon character", "cartoon character", "cartoon character", "cartoon character"]}, {"id": "VS_chart_2_9_2_0", "boxes": [[497, 19, 509, 49], [3, 1, 727, 456], [0, 1, 724, 455], [1, 104, 723, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.21998198330402374, 0.37090548872947693, 0.2869301736354828, 0.24609456956386566, 0.21948817372322083, 0.444021075963974, 0.20504862070083618], "labels": ["number", "graph", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_0_5_0_2", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_4_1_0", "boxes": [[4, 1, 898, 678]], "scores": [0.8334432244300842], "labels": ["map"]}, {"id": "VD_illusion_1_30_0_1", "boxes": [[78, 124, 522, 356], [517, 347, 524, 579], [79, 356, 525, 585], [983, 346, 1262, 582], [987, 429, 1254, 504], [93, 124, 1255, 804], [519, 506, 990, 578], [980, 509, 1255, 584], [74, 582, 521, 812]], "scores": [0.21848195791244507, 0.210903137922287, 0.33126649260520935, 0.20631878077983856, 0.22087714076042175, 0.4765531122684479, 0.20398187637329102, 0.25809353590011597, 0.2796716094017029], "labels": ["square", "line", "rectangle", "rectangle", "rectangle", "grid", "line", "rectangle", "rectangle"]}, {"id": "VD_ocr_2_6_1_0", "boxes": [[26, 193, 144, 286], [60, 209, 389, 501], [61, 217, 303, 502]], "scores": [0.29444819688796997, 0.5127168297767639, 0.2009112685918808], "labels": ["flavor", "cake", "icing"]}, {"id": "VD_video_1_1_0_2", "boxes": [[140, 51, 388, 474], [20, 8, 1906, 484]], "scores": [0.26059427857398987, 0.4401450753211975], "labels": ["cartoon character", "cartoon"]}, {"id": "VD_video_1_18_0_0", "boxes": [[2326, 132, 2613, 495], [1173, 121, 1654, 511], [600, 7, 889, 511], [613, 1, 907, 510], [3209, 220, 3506, 499], [138, 228, 702, 512], [1305, 283, 1656, 510], [2113, 331, 2407, 511], [3095, 387, 3446, 512], [3095, 383, 3462, 511], [0, 444, 116, 513], [206, 390, 691, 511], [1076, 426, 1319, 511], [3081, 477, 3226, 513]], "scores": [0.42086729407310486, 0.2226148098707199, 0.2886866331100464, 0.268052339553833, 0.4260094463825226, 0.2396928071975708, 0.41455385088920593, 0.3497925102710724, 0.2368031144142151, 0.35739317536354065, 0.23461446166038513, 0.3686950206756592, 0.3241584897041321, 0.2004246711730957], "labels": ["uniform", "man", "uniform", "man", "uniform", "man", "uniform", "uniform", "uniform", "baseball uniform", "uniform", "uniform", "uniform", "baseball uniform"]}, {"id": "VS_chart_2_9_2_1", "boxes": [[497, 19, 509, 49], [3, 1, 727, 456], [0, 1, 724, 455], [1, 104, 723, 445], [39, 140, 622, 148], [60, 142, 598, 400], [35, 287, 612, 294]], "scores": [0.21998198330402374, 0.37090548872947693, 0.2869301736354828, 0.24609456956386566, 0.21948817372322083, 0.444021075963974, 0.20504862070083618], "labels": ["number", "graph", "graph", "graph", "line", "graph", "line"]}, {"id": "VS_table_0_5_0_3", "boxes": [], "scores": [], "labels": []}, {"id": "VS_map_1_4_1_1", "boxes": [[4, 1, 898, 678]], "scores": [0.8334432244300842], "labels": ["map"]}] \ No newline at end of file