yajuniverse commited on
Commit
7cbf279
·
verified ·
1 Parent(s): d5550ee

Create leader_board.json

Browse files
Files changed (1) hide show
  1. leader_board.json +444 -0
leader_board.json ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Rank": {
3
+ "0": 1,
4
+ "1": 2,
5
+ "2": 3,
6
+ "3": 4,
7
+ "4": 5,
8
+ "5": 6,
9
+ "6": 7,
10
+ "7": 8,
11
+ "8": 9,
12
+ "9": 10,
13
+ "10": 11,
14
+ "11": 12,
15
+ "12": 13,
16
+ "13": 14,
17
+ "14": 15
18
+ },
19
+ "Model Type": {
20
+ "0": "api",
21
+ "1": "api",
22
+ "2": "api",
23
+ "3": "api",
24
+ "4": "api",
25
+ "5": "api",
26
+ "6": "api",
27
+ "7": "api",
28
+ "8": "OpenSource",
29
+ "9": "api",
30
+ "10": "OpenSource",
31
+ "11": "OpenSource",
32
+ "12": "OpenSource",
33
+ "13": "OpenSource",
34
+ "14": "OpenSource"
35
+ },
36
+ "Model Size": {
37
+ "0": "Unknown",
38
+ "1": "Unknown",
39
+ "2": "Unknown",
40
+ "3": "Unknown",
41
+ "4": "Unknown",
42
+ "5": "Unknown",
43
+ "6": "Unknown",
44
+ "7": "Unknown",
45
+ "8": "4B-10B",
46
+ "9": "Unknown",
47
+ "10": "4B-10B",
48
+ "11": "10B-20B",
49
+ "12": "4B-10B",
50
+ "13": "4B-10B",
51
+ "14": "<4B"
52
+ },
53
+ "Model": {
54
+ "0": "Claude3.5-Sonnet",
55
+ "1": "GPT-4o(0513)",
56
+ "2": "GPT-4-turbo(0409)",
57
+ "3": "Claude-3-Sonnet",
58
+ "4": "Claude-3-Opus",
59
+ "5": "GPT-4v(1106)",
60
+ "6": "Claude-3-Haiku",
61
+ "7": "Gemini-Pro-Vision",
62
+ "8": "InternLM-XComposer2-VL-7B",
63
+ "9": "Qwen-VL-Max",
64
+ "10": "LLaVA-v1.6-Mistral-7B-HF",
65
+ "11": "LLaVA-v1.6-Vicuna-13B-HF",
66
+ "12": "IDEFICS2-8B",
67
+ "13": "Fuyu-8B",
68
+ "14": "Paligemma-3B-Mix-224"
69
+ },
70
+ "Params (B)": {
71
+ "0": "",
72
+ "1": "",
73
+ "2": "",
74
+ "3": "",
75
+ "4": "",
76
+ "5": "",
77
+ "6": "",
78
+ "7": "",
79
+ "8": "7B",
80
+ "9": "",
81
+ "10": "7.57B",
82
+ "11": "13.4B",
83
+ "12": "8.4B",
84
+ "13": "9.41B",
85
+ "14": "2.92B"
86
+ },
87
+ "Supports multiple images": {
88
+ "0": true,
89
+ "1": true,
90
+ "2": true,
91
+ "3": true,
92
+ "4": true,
93
+ "5": true,
94
+ "6": true,
95
+ "7": true,
96
+ "8": false,
97
+ "9": true,
98
+ "10": false,
99
+ "11": true,
100
+ "12": true,
101
+ "13": false,
102
+ "14": false
103
+ },
104
+ "Avg Score\u2b06\ufe0f": {
105
+ "0": 70.7,
106
+ "1": 67.06,
107
+ "2": 54.58,
108
+ "3": 43.6,
109
+ "4": 41.99,
110
+ "5": 37.74,
111
+ "6": 36.42,
112
+ "7": 35.96,
113
+ "8": 18.71,
114
+ "9": 12.89,
115
+ "10": 11.13,
116
+ "11": 6.7,
117
+ "12": 2.08,
118
+ "13": 0.0,
119
+ "14": 0.0
120
+ },
121
+ "Avg Score\u2b50": {
122
+ "0": 71.37,
123
+ "1": 68.53,
124
+ "2": 57.97,
125
+ "3": 45.15,
126
+ "4": 42.88,
127
+ "5": 46.44,
128
+ "6": 33.94,
129
+ "7": 36.56,
130
+ "8": 19.7,
131
+ "9": 12.53,
132
+ "10": 11.72,
133
+ "11": 5.35,
134
+ "12": 2.0,
135
+ "13": 0.0,
136
+ "14": 0.0
137
+ },
138
+ "HumanEval-V": {
139
+ "0": 78.33,
140
+ "1": 75.83,
141
+ "2": 76.67,
142
+ "3": 41.67,
143
+ "4": 52.5,
144
+ "5": 78.33,
145
+ "6": 43.33,
146
+ "7": 46.96,
147
+ "8": 15.0,
148
+ "9": 20.83,
149
+ "10": 6.67,
150
+ "11": 8.33,
151
+ "12": 5.0,
152
+ "13": 0.0,
153
+ "14": 0.0
154
+ },
155
+ "HumanEval-V\u2b50": {
156
+ "0": 84.17,
157
+ "1": 85.83,
158
+ "2": 85.83,
159
+ "3": 55.83,
160
+ "4": 61.67,
161
+ "5": 78.33,
162
+ "6": 45.0,
163
+ "7": 49.57,
164
+ "8": 20.83,
165
+ "9": 22.5,
166
+ "10": 9.17,
167
+ "11": 8.33,
168
+ "12": 4.17,
169
+ "13": 0.0,
170
+ "14": 0.0
171
+ },
172
+ "MBPP-V": {
173
+ "0": 60.83,
174
+ "1": 60.0,
175
+ "2": 60.83,
176
+ "3": 48.33,
177
+ "4": 57.5,
178
+ "5": 55.83,
179
+ "6": 49.17,
180
+ "7": 42.74,
181
+ "8": 29.17,
182
+ "9": 18.33,
183
+ "10": 20.83,
184
+ "11": 13.33,
185
+ "12": 5.0,
186
+ "13": 0.0,
187
+ "14": 0.0
188
+ },
189
+ "MBPP-V\u2b50": {
190
+ "0": 60.0,
191
+ "1": 60.83,
192
+ "2": 60.83,
193
+ "3": 47.5,
194
+ "4": 54.17,
195
+ "5": 59.17,
196
+ "6": 45.0,
197
+ "7": 47.86,
198
+ "8": 31.67,
199
+ "9": 20.0,
200
+ "10": 20.83,
201
+ "11": 15.0,
202
+ "12": 4.17,
203
+ "13": 0.0,
204
+ "14": 0.0
205
+ },
206
+ "GSM8K-V": {
207
+ "0": 92.98,
208
+ "1": 93.86,
209
+ "2": 92.98,
210
+ "3": 82.46,
211
+ "4": 55.26,
212
+ "5": 94.74,
213
+ "6": 47.37,
214
+ "7": 67.54,
215
+ "8": 51.75,
216
+ "9": 34.21,
217
+ "10": 4.39,
218
+ "11": 3.51,
219
+ "12": 1.75,
220
+ "13": 0.0,
221
+ "14": 0.0
222
+ },
223
+ "GSM8K-V\u2b50": {
224
+ "0": 92.11,
225
+ "1": 92.98,
226
+ "2": 92.98,
227
+ "3": 79.82,
228
+ "4": 49.12,
229
+ "5": 94.74,
230
+ "6": 55.26,
231
+ "7": 63.16,
232
+ "8": 51.75,
233
+ "9": 39.47,
234
+ "10": 4.39,
235
+ "11": 3.51,
236
+ "12": 1.75,
237
+ "13": 0.0,
238
+ "14": 0.0
239
+ },
240
+ "MATH-V": {
241
+ "0": 53.85,
242
+ "1": 53.85,
243
+ "2": 50.55,
244
+ "3": 29.67,
245
+ "4": 18.68,
246
+ "5": 39.56,
247
+ "6": 10.99,
248
+ "7": 14.29,
249
+ "8": 8.79,
250
+ "9": 4.4,
251
+ "10": 4.4,
252
+ "11": 6.59,
253
+ "12": 1.1,
254
+ "13": 0.0,
255
+ "14": 0.0
256
+ },
257
+ "MATH-V\u2b50": {
258
+ "0": 56.04,
259
+ "1": 56.04,
260
+ "2": 52.75,
261
+ "3": 26.37,
262
+ "4": 18.68,
263
+ "5": 40.66,
264
+ "6": 15.38,
265
+ "7": 13.19,
266
+ "8": 9.89,
267
+ "9": 2.2,
268
+ "10": 4.44,
269
+ "11": 6.59,
270
+ "12": 2.2,
271
+ "13": 0.0,
272
+ "14": 0.0
273
+ },
274
+ "VP": {
275
+ "0": 61.54,
276
+ "1": 74.36,
277
+ "2": 2.56,
278
+ "3": 35.9,
279
+ "4": 46.15,
280
+ "5": 18.8,
281
+ "6": 47.86,
282
+ "7": 29.06,
283
+ "8": 0.85,
284
+ "9": 1.74,
285
+ "10": 17.95,
286
+ "11": 0.0,
287
+ "12": 0.0,
288
+ "13": 0.0,
289
+ "14": 0.0
290
+ },
291
+ "VP\u2b50": {
292
+ "0": 50.43,
293
+ "1": 67.52,
294
+ "2": 18.8,
295
+ "3": 37.61,
296
+ "4": 50.43,
297
+ "5": 49.57,
298
+ "6": 15.38,
299
+ "7": 29.06,
300
+ "8": 0.0,
301
+ "9": 2.65,
302
+ "10": 20.51,
303
+ "11": 0.0,
304
+ "12": 0.0,
305
+ "13": 0.0,
306
+ "14": 0.0
307
+ },
308
+ "Matplotlib": {
309
+ "0": 53.41,
310
+ "1": 46.14,
311
+ "2": 41.28,
312
+ "3": 23.18,
313
+ "4": 19.55,
314
+ "5": 4.77,
315
+ "6": 18.86,
316
+ "7": 29.55,
317
+ "8": 6.59,
318
+ "9": 2.05,
319
+ "10": 3.18,
320
+ "11": 3.41,
321
+ "12": 1.36,
322
+ "13": 0.0,
323
+ "14": 0.0
324
+ },
325
+ "Matplotlib\u2b50": {
326
+ "0": 56.36,
327
+ "1": 54.09,
328
+ "2": 48.85,
329
+ "3": 26.36,
330
+ "4": 21.36,
331
+ "5": 10.0,
332
+ "6": 23.18,
333
+ "7": 29.17,
334
+ "8": 6.82,
335
+ "9": 0.0,
336
+ "10": 2.5,
337
+ "11": 0.0,
338
+ "12": 0.68,
339
+ "13": 0.0,
340
+ "14": 0.0
341
+ },
342
+ "SVG": {
343
+ "0": 89.81,
344
+ "1": 56.84,
345
+ "2": 44.05,
346
+ "3": 33.49,
347
+ "4": 25.71,
348
+ "5": 3.3,
349
+ "6": 32.31,
350
+ "7": 25.71,
351
+ "8": 19.34,
352
+ "9": 9.43,
353
+ "10": 9.43,
354
+ "11": 4.01,
355
+ "12": 3.3,
356
+ "13": 0.0,
357
+ "14": 0.0
358
+ },
359
+ "SVG\u2b50": {
360
+ "0": 87.26,
361
+ "1": 59.43,
362
+ "2": 43.1,
363
+ "3": 32.78,
364
+ "4": 21.93,
365
+ "5": 12.74,
366
+ "6": 31.37,
367
+ "7": 24.76,
368
+ "8": 21.23,
369
+ "9": 7.55,
370
+ "10": 8.25,
371
+ "11": 4.72,
372
+ "12": 3.54,
373
+ "13": 0.0,
374
+ "14": 0.0
375
+ },
376
+ "TikZ": {
377
+ "0": 54.47,
378
+ "1": 54.5,
379
+ "2": 40.75,
380
+ "3": 16.75,
381
+ "4": 19.75,
382
+ "5": 10.5,
383
+ "6": 9.0,
384
+ "7": 9.5,
385
+ "8": 1.0,
386
+ "9": 0.0,
387
+ "10": 0.75,
388
+ "11": 1.0,
389
+ "12": 0.0,
390
+ "13": 0.0,
391
+ "14": 0.0
392
+ },
393
+ "TikZ\u2b50": {
394
+ "0": 65.26,
395
+ "1": 52.5,
396
+ "2": 39.25,
397
+ "3": 20.75,
398
+ "4": 23.97,
399
+ "5": 32.0,
400
+ "6": 12.0,
401
+ "7": 7.0,
402
+ "8": 1.5,
403
+ "9": 0.0,
404
+ "10": 1.75,
405
+ "11": 2.0,
406
+ "12": 0.25,
407
+ "13": 0.0,
408
+ "14": 0.0
409
+ },
410
+ "Webpage": {
411
+ "0": 91.12,
412
+ "1": 88.12,
413
+ "2": 81.56,
414
+ "3": 80.99,
415
+ "4": 82.79,
416
+ "5": 33.81,
417
+ "6": 68.85,
418
+ "7": 58.26,
419
+ "8": 35.86,
420
+ "9": 25.0,
421
+ "10": 32.58,
422
+ "11": 20.08,
423
+ "12": 1.23,
424
+ "13": 0.0,
425
+ "14": 0.0
426
+ },
427
+ "Webpage\u2b50": {
428
+ "0": 90.7,
429
+ "1": 87.5,
430
+ "2": 79.3,
431
+ "3": 79.34,
432
+ "4": 84.63,
433
+ "5": 40.78,
434
+ "6": 62.91,
435
+ "7": 65.32,
436
+ "8": 33.61,
437
+ "9": 18.39,
438
+ "10": 33.61,
439
+ "11": 7.99,
440
+ "12": 1.23,
441
+ "13": 0.0,
442
+ "14": 0.0
443
+ }
444
+ }