Update README.md
Browse files
README.md
CHANGED
@@ -197,9 +197,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
197 |
</td>
|
198 |
<td>86.66
|
199 |
</td>
|
200 |
-
<td>86.
|
201 |
</td>
|
202 |
-
<td>
|
203 |
</td>
|
204 |
</tr>
|
205 |
<tr>
|
@@ -207,9 +207,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
207 |
</td>
|
208 |
<td>85.32
|
209 |
</td>
|
210 |
-
<td>85.
|
211 |
</td>
|
212 |
-
<td>
|
213 |
</td>
|
214 |
</tr>
|
215 |
<tr>
|
@@ -217,9 +217,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
217 |
</td>
|
218 |
<td>60.65
|
219 |
</td>
|
220 |
-
<td>
|
221 |
</td>
|
222 |
-
<td>
|
223 |
</td>
|
224 |
</tr>
|
225 |
<tr>
|
@@ -227,9 +227,9 @@ This version of the lm-evaluation-harness includes versions of MMLU, ARC-Challen
|
|
227 |
</td>
|
228 |
<td><strong>84.50</strong>
|
229 |
</td>
|
230 |
-
<td><strong>84.
|
231 |
</td>
|
232 |
-
<td><strong>99.
|
233 |
</td>
|
234 |
</tr>
|
235 |
</table>
|
|
|
197 |
</td>
|
198 |
<td>86.66
|
199 |
</td>
|
200 |
+
<td>86.65
|
201 |
</td>
|
202 |
+
<td>100.0%
|
203 |
</td>
|
204 |
</tr>
|
205 |
<tr>
|
|
|
207 |
</td>
|
208 |
<td>85.32
|
209 |
</td>
|
210 |
+
<td>85.10
|
211 |
</td>
|
212 |
+
<td>100.1%
|
213 |
</td>
|
214 |
</tr>
|
215 |
<tr>
|
|
|
217 |
</td>
|
218 |
<td>60.65
|
219 |
</td>
|
220 |
+
<td>61.43
|
221 |
</td>
|
222 |
+
<td>101.3%
|
223 |
</td>
|
224 |
</tr>
|
225 |
<tr>
|
|
|
227 |
</td>
|
228 |
<td><strong>84.50</strong>
|
229 |
</td>
|
230 |
+
<td><strong>84.33</strong>
|
231 |
</td>
|
232 |
+
<td><strong>99.9%</strong>
|
233 |
</td>
|
234 |
</tr>
|
235 |
</table>
|