Update README.md
Browse files
README.md
CHANGED
@@ -35,123 +35,7 @@ parameters:
|
|
35 |
value: [1, 0.5, 0.7, 0.3, 0]
|
36 |
- value: 0.5
|
37 |
```
|
38 |
-
| Model | ARC |HellaSwag| MMLU |TruthfulQA|Winogrande|GSM8K|
|
39 |
-
|---------------------------------------------------------------|----:|--------:|--------------------------|---------:|---------:|----:|
|
40 |
-
|[Sappho_V0.0.4](https://huggingface.co/Jakolo121/Sappho_V0.0.4)|63.65| 84.1|Error: File does not exist| 52.99| 77.66|55.27|
|
41 |
|
42 |
-
### ARC
|
43 |
-
| Task |Version| Metric | Value | |Stderr|
|
44 |
-
|-------------|------:|--------------------|-------------|---|------|
|
45 |
-
|arc_challenge| 1|acc,none | 0.61| | |
|
46 |
-
| | |acc_stderr,none | 0.01| | |
|
47 |
-
| | |acc_norm,none | 0.64| | |
|
48 |
-
| | |acc_norm_stderr,none| 0.01| | |
|
49 |
-
| | |alias |arc_challenge| | |
|
50 |
-
|
51 |
-
Average: 63.65%
|
52 |
-
|
53 |
-
### HellaSwag
|
54 |
-
| Task |Version| Metric | Value | |Stderr|
|
55 |
-
|---------|------:|--------------------|---------|---|------|
|
56 |
-
|hellaswag| 1|acc,none | 0.66| | |
|
57 |
-
| | |acc_stderr,none | 0| | |
|
58 |
-
| | |acc_norm,none | 0.84| | |
|
59 |
-
| | |acc_norm_stderr,none| 0| | |
|
60 |
-
| | |alias |hellaswag| | |
|
61 |
-
|
62 |
-
Average: 84.1%
|
63 |
-
|
64 |
-
### MMLU
|
65 |
-
|
66 |
-
Average: Error: File does not exist%
|
67 |
-
|
68 |
-
### TruthfulQA
|
69 |
-
| Task |Version| Metric | Value | |Stderr|
|
70 |
-
|--------------|-------|-----------------------|-----------------|---|------|
|
71 |
-
|truthfulqa |N/A |rouge2_max,none | 36.50| | |
|
72 |
-
| | |rouge2_max_stderr,none | 1.02| | |
|
73 |
-
| | |rouge1_max,none | 50.18| | |
|
74 |
-
| | |rouge1_max_stderr,none | 0.88| | |
|
75 |
-
| | |rouge1_acc,none | 0.52| | |
|
76 |
-
| | |rouge1_acc_stderr,none | 0.02| | |
|
77 |
-
| | |bleu_max,none | 25.40| | |
|
78 |
-
| | |bleu_max_stderr,none | 0.81| | |
|
79 |
-
| | |rouge2_acc,none | 0.45| | |
|
80 |
-
| | |rouge2_acc_stderr,none | 0.02| | |
|
81 |
-
| | |rouge2_diff,none | 5.12| | |
|
82 |
-
| | |rouge2_diff_stderr,none| 1.14| | |
|
83 |
-
| | |acc,none | 0.45| | |
|
84 |
-
| | |acc_stderr,none | 0.01| | |
|
85 |
-
| | |bleu_acc,none | 0.52| | |
|
86 |
-
| | |bleu_acc_stderr,none | 0.02| | |
|
87 |
-
| | |rouge1_diff,none | 4.67| | |
|
88 |
-
| | |rouge1_diff_stderr,none| 1.08| | |
|
89 |
-
| | |rougeL_diff,none | 3.92| | |
|
90 |
-
| | |rougeL_diff_stderr,none| 1.09| | |
|
91 |
-
| | |bleu_diff,none | 4| | |
|
92 |
-
| | |bleu_diff_stderr,none | 0.79| | |
|
93 |
-
| | |rougeL_acc,none | 0.50| | |
|
94 |
-
| | |rougeL_acc_stderr,none | 0.02| | |
|
95 |
-
| | |rougeL_max,none | 46.87| | |
|
96 |
-
| | |rougeL_max_stderr,none | 0.91| | |
|
97 |
-
| | |alias |truthfulqa | | |
|
98 |
-
|truthfulqa_gen| 3|bleu_max,none | 25.40| | |
|
99 |
-
| | |bleu_max_stderr,none | 0.81| | |
|
100 |
-
| | |bleu_acc,none | 0.52| | |
|
101 |
-
| | |bleu_acc_stderr,none | 0.02| | |
|
102 |
-
| | |bleu_diff,none | 4| | |
|
103 |
-
| | |bleu_diff_stderr,none | 0.79| | |
|
104 |
-
| | |rouge1_max,none | 50.18| | |
|
105 |
-
| | |rouge1_max_stderr,none | 0.88| | |
|
106 |
-
| | |rouge1_acc,none | 0.52| | |
|
107 |
-
| | |rouge1_acc_stderr,none | 0.02| | |
|
108 |
-
| | |rouge1_diff,none | 4.67| | |
|
109 |
-
| | |rouge1_diff_stderr,none| 1.08| | |
|
110 |
-
| | |rouge2_max,none | 36.50| | |
|
111 |
-
| | |rouge2_max_stderr,none | 1.02| | |
|
112 |
-
| | |rouge2_acc,none | 0.45| | |
|
113 |
-
| | |rouge2_acc_stderr,none | 0.02| | |
|
114 |
-
| | |rouge2_diff,none | 5.12| | |
|
115 |
-
| | |rouge2_diff_stderr,none| 1.14| | |
|
116 |
-
| | |rougeL_max,none | 46.87| | |
|
117 |
-
| | |rougeL_max_stderr,none | 0.91| | |
|
118 |
-
| | |rougeL_acc,none | 0.50| | |
|
119 |
-
| | |rougeL_acc_stderr,none | 0.02| | |
|
120 |
-
| | |rougeL_diff,none | 3.92| | |
|
121 |
-
| | |rougeL_diff_stderr,none| 1.09| | |
|
122 |
-
| | |alias | - truthfulqa_gen| | |
|
123 |
-
|truthfulqa_mc1| 2|acc,none | 0.37| | |
|
124 |
-
| | |acc_stderr,none | 0.02| | |
|
125 |
-
| | |alias | - truthfulqa_mc1| | |
|
126 |
-
|truthfulqa_mc2| 2|acc,none | 0.53| | |
|
127 |
-
| | |acc_stderr,none | 0.02| | |
|
128 |
-
| | |alias | - truthfulqa_mc2| | |
|
129 |
-
|
130 |
-
Average: 52.99%
|
131 |
-
|
132 |
-
### Winogrande
|
133 |
-
| Task |Version| Metric | Value | |Stderr|
|
134 |
-
|----------|------:|---------------|----------|---|------|
|
135 |
-
|winogrande| 1|acc,none | 0.78| | |
|
136 |
-
| | |acc_stderr,none| 0.01| | |
|
137 |
-
| | |alias |winogrande| | |
|
138 |
-
|
139 |
-
Average: 77.66%
|
140 |
-
|
141 |
-
### GSM8K
|
142 |
-
|Task |Version| Metric |Value| |Stderr|
|
143 |
-
|-----|------:|-----------------------------------|-----|---|------|
|
144 |
-
|gsm8k| 3|exact_match,strict-match | 0.55| | |
|
145 |
-
| | |exact_match_stderr,strict-match | 0.01| | |
|
146 |
-
| | |exact_match,flexible-extract | 0.56| | |
|
147 |
-
| | |exact_match_stderr,flexible-extract| 0.01| | |
|
148 |
-
| | |alias |gsm8k| | |
|
149 |
-
|
150 |
-
Average: 55.27%
|
151 |
-
|
152 |
-
Average score: Not available due to errors
|
153 |
-
|
154 |
-
Elapsed time: 06:08:53
|
155 |
## 💻 Usage
|
156 |
|
157 |
```python
|
|
|
35 |
value: [1, 0.5, 0.7, 0.3, 0]
|
36 |
- value: 0.5
|
37 |
```
|
|
|
|
|
|
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
## 💻 Usage
|
40 |
|
41 |
```python
|