dzanbek commited on
Commit
1620cfb
·
verified ·
1 Parent(s): 11c4ae8

Training in progress, step 6, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f11339acf79a96935564bc101bf3302f2ea49a5e79a1cb6a2b97970436c4d68a
3
  size 25986148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6667ce0c575c8e876d70533fd3d44fa002d0923d53924c86e6cb19dfdb2e8cc9
3
  size 25986148
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97b21c7f33e6dd1250e43a1885dcf2862e9a21f89ea7c79b66b9571be9c48ccc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4024304f7a0346e1ea2dd1d3e14e932816a39584bd6329811f3108a9687b47f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7adb031dc938779259a07e684ea318126e92e52b40c256b93ef474d2ac57a5b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92b4cf85d7ba7a497f88ff799bf4dec5af7dd95be6e00f78bf46ba5deb56bbf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.007782101167315175,
5
  "eval_steps": 2,
6
- "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -59,6 +59,28 @@
59
  "eval_samples_per_second": 27.983,
60
  "eval_steps_per_second": 14.056,
61
  "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 1,
@@ -78,7 +100,7 @@
78
  "attributes": {}
79
  }
80
  },
81
- "total_flos": 205873754406912.0,
82
  "train_batch_size": 2,
83
  "trial_name": null,
84
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.011673151750972763,
5
  "eval_steps": 2,
6
+ "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
59
  "eval_samples_per_second": 27.983,
60
  "eval_steps_per_second": 14.056,
61
  "step": 4
62
+ },
63
+ {
64
+ "epoch": 0.009727626459143969,
65
+ "grad_norm": NaN,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.0,
68
+ "step": 5
69
+ },
70
+ {
71
+ "epoch": 0.011673151750972763,
72
+ "grad_norm": NaN,
73
+ "learning_rate": 6e-05,
74
+ "loss": 0.0,
75
+ "step": 6
76
+ },
77
+ {
78
+ "epoch": 0.011673151750972763,
79
+ "eval_loss": NaN,
80
+ "eval_runtime": 7.6089,
81
+ "eval_samples_per_second": 28.519,
82
+ "eval_steps_per_second": 14.325,
83
+ "step": 6
84
  }
85
  ],
86
  "logging_steps": 1,
 
100
  "attributes": {}
101
  }
102
  },
103
+ "total_flos": 308810631610368.0,
104
  "train_batch_size": 2,
105
  "trial_name": null,
106
  "trial_params": null