Update README.md
Browse files
README.md
CHANGED
@@ -40,6 +40,25 @@ Step Training Loss Validation Loss<br>
|
|
40 |
275 0.931000 0.960848<br>
|
41 |
300 0.932000 0.958946 <-- picked checkpoint <br>
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
### Model Description
|
44 |
|
45 |
|
|
|
40 |
275 0.931000 0.960848<br>
|
41 |
300 0.932000 0.958946 <-- picked checkpoint <br>
|
42 |
|
43 |
+
### Training Parameters
|
44 |
+
per_device_train_batch_size = 4,<br>
|
45 |
+
gradient_accumulation_steps = 16,<br>
|
46 |
+
num_train_epochs=3,<br>
|
47 |
+
warmup_steps = 5,<br>
|
48 |
+
learning_rate = 3e-5,<br>
|
49 |
+
logging_steps = 25,<br>
|
50 |
+
optim = "adamw_8bit",<br>
|
51 |
+
weight_decay = 0.01,<br>
|
52 |
+
lr_scheduler_type = "linear",<br>
|
53 |
+
seed = 3407,<br>
|
54 |
+
per_device_eval_batch_size = 2,<br>
|
55 |
+
eval_strategy="steps",<br>
|
56 |
+
eval_accumulation_steps = 32,<br>
|
57 |
+
eval_steps = 25,<br>
|
58 |
+
eval_delay = 0,<br>
|
59 |
+
save_strategy="steps",<br>
|
60 |
+
save_steps=50,<br>
|
61 |
+
|
62 |
### Model Description
|
63 |
|
64 |
|