yashcode00 commited on
Commit
c9f81bd
·
1 Parent(s): ca69592

yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor](https://huggingface.co/yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.2045
21
- - Accuracy: 0.9484
22
 
23
  ## Model description
24
 
@@ -39,27 +39,22 @@ More information needed
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
  - train_batch_size: 16
42
- - eval_batch_size: 8
43
  - seed: 42
44
- - gradient_accumulation_steps: 8
45
- - total_train_batch_size: 128
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
- - num_epochs: 100
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
- | 0.0213 | 10.55 | 1000 | 0.2103 | 0.9460 |
55
- | 0.0192 | 21.11 | 2000 | 0.1935 | 0.9480 |
56
- | 0.0196 | 31.66 | 3000 | 0.2777 | 0.9278 |
57
- | 0.014 | 42.22 | 4000 | 0.1927 | 0.9480 |
58
- | 0.0141 | 52.77 | 5000 | 0.2184 | 0.9439 |
59
- | 0.0106 | 63.32 | 6000 | 0.2401 | 0.9348 |
60
- | 0.0112 | 73.88 | 7000 | 0.2206 | 0.9493 |
61
- | 0.0085 | 84.43 | 8000 | 0.1907 | 0.9526 |
62
- | 0.0079 | 94.99 | 9000 | 0.2052 | 0.9484 |
63
 
64
 
65
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor](https://huggingface.co/yashcode00/wav2vec2-large-xlsr-indian-language-classification-featureExtractor) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.1719
21
+ - Accuracy: 0.9554
22
 
23
  ## Model description
24
 
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
  - train_batch_size: 16
42
+ - eval_batch_size: 16
43
  - seed: 42
44
+ - gradient_accumulation_steps: 16
45
+ - total_train_batch_size: 256
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
+ - num_epochs: 90
49
 
50
  ### Training results
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
+ | 0.0103 | 21.11 | 1000 | 0.1802 | 0.9501 |
55
+ | 0.009 | 42.22 | 2000 | 0.1717 | 0.9497 |
56
+ | 0.0086 | 63.32 | 3000 | 0.1675 | 0.9546 |
57
+ | 0.0073 | 84.43 | 4000 | 0.1686 | 0.9538 |
 
 
 
 
 
58
 
59
 
60
  ### Framework versions
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 99.21,
3
- "eval_accuracy": 0.9484323263168335,
4
- "eval_loss": 0.2045244723558426,
5
- "eval_runtime": 49.9991,
6
  "eval_samples": 2424,
7
- "eval_samples_per_second": 48.481,
8
- "eval_steps_per_second": 6.06,
9
- "total_flos": 3.653391792237703e+19,
10
- "train_loss": 0.013827496963612577,
11
- "train_runtime": 35671.7984,
12
  "train_samples": 12120,
13
- "train_samples_per_second": 33.976,
14
- "train_steps_per_second": 0.264
15
  }
 
1
  {
2
+ "epoch": 89.29,
3
+ "eval_accuracy": 0.9554455280303955,
4
+ "eval_loss": 0.1719195693731308,
5
+ "eval_runtime": 50.8715,
6
  "eval_samples": 2424,
7
+ "eval_samples_per_second": 47.65,
8
+ "eval_steps_per_second": 2.988,
9
+ "total_flos": 3.2880550437308154e+19,
10
+ "train_loss": 0.00991302564845863,
11
+ "train_runtime": 33902.9866,
12
  "train_samples": 12120,
13
+ "train_samples_per_second": 32.174,
14
+ "train_steps_per_second": 0.125
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 99.21,
3
- "eval_accuracy": 0.9484323263168335,
4
- "eval_loss": 0.2045244723558426,
5
- "eval_runtime": 49.9991,
6
  "eval_samples": 2424,
7
- "eval_samples_per_second": 48.481,
8
- "eval_steps_per_second": 6.06
9
  }
 
1
  {
2
+ "epoch": 89.29,
3
+ "eval_accuracy": 0.9554455280303955,
4
+ "eval_loss": 0.1719195693731308,
5
+ "eval_runtime": 50.8715,
6
  "eval_samples": 2424,
7
+ "eval_samples_per_second": 47.65,
8
+ "eval_steps_per_second": 2.988
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:077e7eb0595bb98c67ee4c36df372f7f8d867c46c7d5e528c1c4558712b46bff
3
  size 1266146037
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ceaacefa2250a52d0d79eeb85b6a0da21680d9b9b79e1d64c35a9ab6bd911c1
3
  size 1266146037
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 99.21,
3
- "total_flos": 3.653391792237703e+19,
4
- "train_loss": 0.013827496963612577,
5
- "train_runtime": 35671.7984,
6
  "train_samples": 12120,
7
- "train_samples_per_second": 33.976,
8
- "train_steps_per_second": 0.264
9
  }
 
1
  {
2
+ "epoch": 89.29,
3
+ "total_flos": 3.2880550437308154e+19,
4
+ "train_loss": 0.00991302564845863,
5
+ "train_runtime": 33902.9866,
6
  "train_samples": 12120,
7
+ "train_samples_per_second": 32.174,
8
+ "train_steps_per_second": 0.125
9
  }
trainer_state.json CHANGED
@@ -1,682 +1,325 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 99.2084432717678,
5
  "eval_steps": 1000,
6
- "global_step": 9400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 1.06,
13
- "learning_rate": 4.946808510638298e-05,
14
- "loss": 0.0232,
15
- "step": 100
16
- },
17
  {
18
  "epoch": 2.11,
19
- "learning_rate": 4.893617021276596e-05,
20
- "loss": 0.018,
21
- "step": 200
22
- },
23
- {
24
- "epoch": 3.17,
25
- "learning_rate": 4.840425531914894e-05,
26
- "loss": 0.0271,
27
- "step": 300
28
  },
29
  {
30
  "epoch": 4.22,
31
- "learning_rate": 4.787234042553192e-05,
32
- "loss": 0.0241,
33
- "step": 400
34
- },
35
- {
36
- "epoch": 5.28,
37
- "learning_rate": 4.734042553191489e-05,
38
- "loss": 0.025,
39
- "step": 500
40
  },
41
  {
42
  "epoch": 6.33,
43
- "learning_rate": 4.680851063829788e-05,
44
- "loss": 0.0183,
45
- "step": 600
46
- },
47
- {
48
- "epoch": 7.39,
49
- "learning_rate": 4.627659574468085e-05,
50
- "loss": 0.0216,
51
- "step": 700
52
  },
53
  {
54
  "epoch": 8.44,
55
- "learning_rate": 4.575e-05,
56
- "loss": 0.0239,
57
- "step": 800
58
- },
59
- {
60
- "epoch": 9.5,
61
- "learning_rate": 4.521808510638298e-05,
62
- "loss": 0.025,
63
- "step": 900
64
- },
65
- {
66
- "epoch": 10.55,
67
- "learning_rate": 4.468617021276596e-05,
68
- "loss": 0.0213,
69
- "step": 1000
70
  },
71
  {
72
  "epoch": 10.55,
73
- "eval_accuracy": 0.9459571242332458,
74
- "eval_loss": 0.2103370875120163,
75
- "eval_runtime": 49.3555,
76
- "eval_samples_per_second": 49.113,
77
- "eval_steps_per_second": 6.139,
78
- "step": 1000
79
- },
80
- {
81
- "epoch": 11.61,
82
- "learning_rate": 4.415425531914894e-05,
83
- "loss": 0.0226,
84
- "step": 1100
85
  },
86
  {
87
  "epoch": 12.66,
88
- "learning_rate": 4.362234042553192e-05,
89
- "loss": 0.0163,
90
- "step": 1200
91
- },
92
- {
93
- "epoch": 13.72,
94
- "learning_rate": 4.30904255319149e-05,
95
- "loss": 0.0196,
96
- "step": 1300
97
  },
98
  {
99
  "epoch": 14.78,
100
- "learning_rate": 4.2558510638297876e-05,
101
- "loss": 0.0184,
102
- "step": 1400
103
- },
104
- {
105
- "epoch": 15.83,
106
- "learning_rate": 4.2026595744680855e-05,
107
- "loss": 0.0209,
108
- "step": 1500
109
  },
110
  {
111
  "epoch": 16.89,
112
- "learning_rate": 4.1494680851063834e-05,
113
- "loss": 0.0186,
114
- "step": 1600
115
- },
116
- {
117
- "epoch": 17.94,
118
- "learning_rate": 4.096276595744681e-05,
119
- "loss": 0.0193,
120
- "step": 1700
121
  },
122
  {
123
  "epoch": 19.0,
124
- "learning_rate": 4.0430851063829786e-05,
125
- "loss": 0.0198,
126
- "step": 1800
127
- },
128
- {
129
- "epoch": 20.05,
130
- "learning_rate": 3.989893617021277e-05,
131
- "loss": 0.0206,
132
- "step": 1900
133
  },
134
  {
135
  "epoch": 21.11,
136
- "learning_rate": 3.9367021276595744e-05,
137
- "loss": 0.0192,
138
- "step": 2000
139
  },
140
  {
141
  "epoch": 21.11,
142
- "eval_accuracy": 0.948019802570343,
143
- "eval_loss": 0.19352349638938904,
144
- "eval_runtime": 49.5323,
145
- "eval_samples_per_second": 48.938,
146
- "eval_steps_per_second": 6.117,
147
- "step": 2000
148
- },
149
- {
150
- "epoch": 22.16,
151
- "learning_rate": 3.8835106382978724e-05,
152
- "loss": 0.015,
153
- "step": 2100
154
  },
155
  {
156
  "epoch": 23.22,
157
- "learning_rate": 3.83031914893617e-05,
158
- "loss": 0.0197,
159
- "step": 2200
160
- },
161
- {
162
- "epoch": 24.27,
163
- "learning_rate": 3.777127659574468e-05,
164
- "loss": 0.0159,
165
- "step": 2300
166
  },
167
  {
168
  "epoch": 25.33,
169
- "learning_rate": 3.723936170212766e-05,
170
- "loss": 0.0219,
171
- "step": 2400
172
- },
173
- {
174
- "epoch": 26.39,
175
- "learning_rate": 3.670744680851064e-05,
176
- "loss": 0.0145,
177
- "step": 2500
178
  },
179
  {
180
  "epoch": 27.44,
181
- "learning_rate": 3.617553191489362e-05,
182
- "loss": 0.0153,
183
- "step": 2600
184
- },
185
- {
186
- "epoch": 28.5,
187
- "learning_rate": 3.56436170212766e-05,
188
- "loss": 0.0214,
189
- "step": 2700
190
  },
191
  {
192
  "epoch": 29.55,
193
- "learning_rate": 3.511170212765957e-05,
194
- "loss": 0.0196,
195
- "step": 2800
196
- },
197
- {
198
- "epoch": 30.61,
199
- "learning_rate": 3.457978723404256e-05,
200
- "loss": 0.0147,
201
- "step": 2900
202
- },
203
- {
204
- "epoch": 31.66,
205
- "learning_rate": 3.4047872340425536e-05,
206
- "loss": 0.0196,
207
- "step": 3000
208
  },
209
  {
210
  "epoch": 31.66,
211
- "eval_accuracy": 0.9278053045272827,
212
- "eval_loss": 0.2776908278465271,
213
- "eval_runtime": 49.5681,
214
- "eval_samples_per_second": 48.902,
215
- "eval_steps_per_second": 6.113,
216
- "step": 3000
217
- },
218
- {
219
- "epoch": 32.72,
220
- "learning_rate": 3.351595744680851e-05,
221
- "loss": 0.0196,
222
- "step": 3100
223
  },
224
  {
225
  "epoch": 33.77,
226
- "learning_rate": 3.298404255319149e-05,
227
- "loss": 0.0134,
228
- "step": 3200
229
- },
230
- {
231
- "epoch": 34.83,
232
- "learning_rate": 3.2452127659574474e-05,
233
- "loss": 0.015,
234
- "step": 3300
235
  },
236
  {
237
  "epoch": 35.88,
238
- "learning_rate": 3.1920212765957446e-05,
239
- "loss": 0.0203,
240
- "step": 3400
241
- },
242
- {
243
- "epoch": 36.94,
244
- "learning_rate": 3.1388297872340426e-05,
245
- "loss": 0.0155,
246
- "step": 3500
247
  },
248
  {
249
  "epoch": 37.99,
250
- "learning_rate": 3.0861702127659576e-05,
251
- "loss": 0.0123,
252
- "step": 3600
253
- },
254
- {
255
- "epoch": 39.05,
256
- "learning_rate": 3.0329787234042556e-05,
257
- "loss": 0.0166,
258
- "step": 3700
259
  },
260
  {
261
  "epoch": 40.11,
262
- "learning_rate": 2.979787234042553e-05,
263
- "loss": 0.013,
264
- "step": 3800
265
- },
266
- {
267
- "epoch": 41.16,
268
- "learning_rate": 2.926595744680851e-05,
269
- "loss": 0.0176,
270
- "step": 3900
271
  },
272
  {
273
  "epoch": 42.22,
274
- "learning_rate": 2.8734042553191493e-05,
275
- "loss": 0.014,
276
- "step": 4000
277
  },
278
  {
279
  "epoch": 42.22,
280
- "eval_accuracy": 0.948019802570343,
281
- "eval_loss": 0.19270524382591248,
282
- "eval_runtime": 49.5193,
283
- "eval_samples_per_second": 48.951,
284
- "eval_steps_per_second": 6.119,
285
- "step": 4000
286
- },
287
- {
288
- "epoch": 43.27,
289
- "learning_rate": 2.820212765957447e-05,
290
- "loss": 0.0154,
291
- "step": 4100
292
  },
293
  {
294
  "epoch": 44.33,
295
- "learning_rate": 2.7670212765957448e-05,
296
- "loss": 0.0129,
297
- "step": 4200
298
- },
299
- {
300
- "epoch": 45.38,
301
- "learning_rate": 2.713829787234043e-05,
302
- "loss": 0.0121,
303
- "step": 4300
304
  },
305
  {
306
  "epoch": 46.44,
307
- "learning_rate": 2.6606382978723403e-05,
308
- "loss": 0.0151,
309
- "step": 4400
310
- },
311
- {
312
- "epoch": 47.49,
313
- "learning_rate": 2.607978723404255e-05,
314
- "loss": 0.0169,
315
- "step": 4500
316
  },
317
  {
318
  "epoch": 48.55,
319
- "learning_rate": 2.5547872340425533e-05,
320
- "loss": 0.0084,
321
- "step": 4600
322
- },
323
- {
324
- "epoch": 49.6,
325
- "learning_rate": 2.5015957446808512e-05,
326
- "loss": 0.0147,
327
- "step": 4700
328
  },
329
  {
330
  "epoch": 50.66,
331
- "learning_rate": 2.448404255319149e-05,
332
- "loss": 0.0108,
333
- "step": 4800
334
- },
335
- {
336
- "epoch": 51.72,
337
- "learning_rate": 2.395212765957447e-05,
338
- "loss": 0.0113,
339
- "step": 4900
340
- },
341
- {
342
- "epoch": 52.77,
343
- "learning_rate": 2.3420212765957447e-05,
344
- "loss": 0.0141,
345
- "step": 5000
346
  },
347
  {
348
  "epoch": 52.77,
349
- "eval_accuracy": 0.9438943862915039,
350
- "eval_loss": 0.2183828353881836,
351
- "eval_runtime": 50.0576,
352
- "eval_samples_per_second": 48.424,
353
- "eval_steps_per_second": 6.053,
354
- "step": 5000
355
- },
356
- {
357
- "epoch": 53.83,
358
- "learning_rate": 2.288829787234043e-05,
359
- "loss": 0.011,
360
- "step": 5100
361
  },
362
  {
363
  "epoch": 54.88,
364
- "learning_rate": 2.2356382978723405e-05,
365
- "loss": 0.014,
366
- "step": 5200
367
- },
368
- {
369
- "epoch": 55.94,
370
- "learning_rate": 2.1824468085106384e-05,
371
- "loss": 0.0101,
372
- "step": 5300
373
  },
374
  {
375
  "epoch": 56.99,
376
- "learning_rate": 2.1292553191489363e-05,
377
- "loss": 0.0148,
378
- "step": 5400
379
- },
380
- {
381
- "epoch": 58.05,
382
- "learning_rate": 2.0760638297872343e-05,
383
- "loss": 0.0105,
384
- "step": 5500
385
  },
386
  {
387
  "epoch": 59.1,
388
- "learning_rate": 2.0228723404255322e-05,
389
- "loss": 0.0113,
390
- "step": 5600
391
- },
392
- {
393
- "epoch": 60.16,
394
- "learning_rate": 1.9696808510638298e-05,
395
- "loss": 0.0089,
396
- "step": 5700
397
  },
398
  {
399
  "epoch": 61.21,
400
- "learning_rate": 1.9164893617021277e-05,
401
- "loss": 0.0111,
402
- "step": 5800
403
- },
404
- {
405
- "epoch": 62.27,
406
- "learning_rate": 1.8632978723404256e-05,
407
- "loss": 0.0118,
408
- "step": 5900
409
  },
410
  {
411
  "epoch": 63.32,
412
- "learning_rate": 1.8101063829787235e-05,
413
- "loss": 0.0106,
414
- "step": 6000
415
  },
416
  {
417
  "epoch": 63.32,
418
- "eval_accuracy": 0.9348185062408447,
419
- "eval_loss": 0.24008634686470032,
420
- "eval_runtime": 49.5139,
421
- "eval_samples_per_second": 48.956,
422
- "eval_steps_per_second": 6.119,
423
- "step": 6000
424
- },
425
- {
426
- "epoch": 64.38,
427
- "learning_rate": 1.7569148936170214e-05,
428
- "loss": 0.0104,
429
- "step": 6100
430
  },
431
  {
432
  "epoch": 65.44,
433
- "learning_rate": 1.7037234042553194e-05,
434
- "loss": 0.0099,
435
- "step": 6200
436
- },
437
- {
438
- "epoch": 66.49,
439
- "learning_rate": 1.6505319148936173e-05,
440
- "loss": 0.0105,
441
- "step": 6300
442
  },
443
  {
444
  "epoch": 67.55,
445
- "learning_rate": 1.597340425531915e-05,
446
- "loss": 0.0095,
447
- "step": 6400
448
- },
449
- {
450
- "epoch": 68.6,
451
- "learning_rate": 1.5441489361702128e-05,
452
- "loss": 0.0088,
453
- "step": 6500
454
  },
455
  {
456
  "epoch": 69.66,
457
- "learning_rate": 1.4909574468085109e-05,
458
- "loss": 0.0106,
459
- "step": 6600
460
- },
461
- {
462
- "epoch": 70.71,
463
- "learning_rate": 1.4377659574468086e-05,
464
- "loss": 0.0081,
465
- "step": 6700
466
  },
467
  {
468
  "epoch": 71.77,
469
- "learning_rate": 1.3845744680851064e-05,
470
- "loss": 0.0096,
471
- "step": 6800
472
- },
473
- {
474
- "epoch": 72.82,
475
- "learning_rate": 1.3313829787234045e-05,
476
  "loss": 0.0074,
477
- "step": 6900
478
- },
479
- {
480
- "epoch": 73.88,
481
- "learning_rate": 1.2781914893617022e-05,
482
- "loss": 0.0112,
483
- "step": 7000
484
  },
485
  {
486
  "epoch": 73.88,
487
- "eval_accuracy": 0.9492574334144592,
488
- "eval_loss": 0.22060245275497437,
489
- "eval_runtime": 49.9232,
490
- "eval_samples_per_second": 48.555,
491
- "eval_steps_per_second": 6.069,
492
- "step": 7000
493
- },
494
- {
495
- "epoch": 74.93,
496
- "learning_rate": 1.225e-05,
497
- "loss": 0.0097,
498
- "step": 7100
499
  },
500
  {
501
  "epoch": 75.99,
502
- "learning_rate": 1.171808510638298e-05,
503
- "loss": 0.0115,
504
- "step": 7200
505
- },
506
- {
507
- "epoch": 77.04,
508
- "learning_rate": 1.1186170212765958e-05,
509
- "loss": 0.0103,
510
- "step": 7300
511
  },
512
  {
513
  "epoch": 78.1,
514
- "learning_rate": 1.0654255319148937e-05,
515
- "loss": 0.0091,
516
- "step": 7400
517
- },
518
- {
519
- "epoch": 79.16,
520
- "learning_rate": 1.0122340425531915e-05,
521
- "loss": 0.0085,
522
- "step": 7500
523
  },
524
  {
525
  "epoch": 80.21,
526
- "learning_rate": 9.590425531914894e-06,
527
- "loss": 0.0074,
528
- "step": 7600
529
- },
530
- {
531
- "epoch": 81.27,
532
- "learning_rate": 9.058510638297871e-06,
533
- "loss": 0.0102,
534
- "step": 7700
535
  },
536
  {
537
  "epoch": 82.32,
538
- "learning_rate": 8.52659574468085e-06,
539
  "loss": 0.0072,
540
- "step": 7800
541
- },
542
- {
543
- "epoch": 83.38,
544
- "learning_rate": 7.994680851063832e-06,
545
- "loss": 0.0076,
546
- "step": 7900
547
  },
548
  {
549
  "epoch": 84.43,
550
- "learning_rate": 7.462765957446809e-06,
551
- "loss": 0.0085,
552
- "step": 8000
553
  },
554
  {
555
  "epoch": 84.43,
556
- "eval_accuracy": 0.9525577425956726,
557
- "eval_loss": 0.19072547554969788,
558
- "eval_runtime": 50.1001,
559
- "eval_samples_per_second": 48.383,
560
- "eval_steps_per_second": 6.048,
561
- "step": 8000
562
- },
563
- {
564
- "epoch": 85.49,
565
- "learning_rate": 6.930851063829788e-06,
566
- "loss": 0.0082,
567
- "step": 8100
568
  },
569
  {
570
  "epoch": 86.54,
571
- "learning_rate": 6.398936170212766e-06,
572
- "loss": 0.0068,
573
- "step": 8200
574
- },
575
- {
576
- "epoch": 87.6,
577
- "learning_rate": 5.867021276595745e-06,
578
- "loss": 0.0093,
579
- "step": 8300
580
  },
581
  {
582
  "epoch": 88.65,
583
- "learning_rate": 5.335106382978724e-06,
584
- "loss": 0.0112,
585
- "step": 8400
586
- },
587
- {
588
- "epoch": 89.71,
589
- "learning_rate": 4.8031914893617025e-06,
590
- "loss": 0.0066,
591
- "step": 8500
592
- },
593
- {
594
- "epoch": 90.77,
595
- "learning_rate": 4.271276595744681e-06,
596
- "loss": 0.007,
597
- "step": 8600
598
- },
599
- {
600
- "epoch": 91.82,
601
- "learning_rate": 3.7393617021276596e-06,
602
- "loss": 0.0079,
603
- "step": 8700
604
- },
605
- {
606
- "epoch": 92.88,
607
- "learning_rate": 3.2074468085106384e-06,
608
- "loss": 0.0075,
609
- "step": 8800
610
- },
611
- {
612
- "epoch": 93.93,
613
- "learning_rate": 2.6808510638297874e-06,
614
- "loss": 0.008,
615
- "step": 8900
616
- },
617
- {
618
- "epoch": 94.99,
619
- "learning_rate": 2.148936170212766e-06,
620
- "loss": 0.0079,
621
- "step": 9000
622
- },
623
- {
624
- "epoch": 94.99,
625
- "eval_accuracy": 0.9484323263168335,
626
- "eval_loss": 0.20523911714553833,
627
- "eval_runtime": 49.881,
628
- "eval_samples_per_second": 48.596,
629
- "eval_steps_per_second": 6.074,
630
- "step": 9000
631
- },
632
- {
633
- "epoch": 96.04,
634
- "learning_rate": 1.6170212765957448e-06,
635
- "loss": 0.0061,
636
- "step": 9100
637
- },
638
- {
639
- "epoch": 97.1,
640
- "learning_rate": 1.0851063829787236e-06,
641
- "loss": 0.0088,
642
- "step": 9200
643
- },
644
- {
645
- "epoch": 98.15,
646
- "learning_rate": 5.531914893617021e-07,
647
- "loss": 0.0077,
648
- "step": 9300
649
- },
650
- {
651
- "epoch": 99.21,
652
- "learning_rate": 2.1276595744680853e-08,
653
- "loss": 0.0077,
654
- "step": 9400
655
  },
656
  {
657
- "epoch": 99.21,
658
- "step": 9400,
659
- "total_flos": 3.653391792237703e+19,
660
- "train_loss": 0.013827496963612577,
661
- "train_runtime": 35671.7984,
662
- "train_samples_per_second": 33.976,
663
- "train_steps_per_second": 0.264
664
  },
665
  {
666
- "epoch": 99.21,
667
- "eval_accuracy": 0.9484323263168335,
668
- "eval_loss": 0.2045244723558426,
669
- "eval_runtime": 49.9991,
670
- "eval_samples_per_second": 48.481,
671
- "eval_steps_per_second": 6.06,
672
- "step": 9400
673
  }
674
  ],
675
  "logging_steps": 100,
676
- "max_steps": 9400,
677
- "num_train_epochs": 100,
678
  "save_steps": 2000,
679
- "total_flos": 3.653391792237703e+19,
680
  "trial_name": null,
681
  "trial_params": null
682
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 89.28759894459102,
5
  "eval_steps": 1000,
6
+ "global_step": 4230,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
11
  {
12
  "epoch": 2.11,
13
+ "learning_rate": 4.8817966903073283e-05,
14
+ "loss": 0.0118,
15
+ "step": 100
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 4.22,
19
+ "learning_rate": 4.763593380614658e-05,
20
+ "loss": 0.0156,
21
+ "step": 200
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 6.33,
25
+ "learning_rate": 4.645390070921986e-05,
26
+ "loss": 0.0122,
27
+ "step": 300
 
 
 
 
 
 
28
  },
29
  {
30
  "epoch": 8.44,
31
+ "learning_rate": 4.527186761229315e-05,
32
+ "loss": 0.0148,
33
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
34
  },
35
  {
36
  "epoch": 10.55,
37
+ "learning_rate": 4.4089834515366435e-05,
38
+ "loss": 0.0114,
39
+ "step": 500
 
 
 
 
 
 
 
 
 
40
  },
41
  {
42
  "epoch": 12.66,
43
+ "learning_rate": 4.2907801418439716e-05,
44
+ "loss": 0.0143,
45
+ "step": 600
 
 
 
 
 
 
46
  },
47
  {
48
  "epoch": 14.78,
49
+ "learning_rate": 4.1725768321513004e-05,
50
+ "loss": 0.0149,
51
+ "step": 700
 
 
 
 
 
 
52
  },
53
  {
54
  "epoch": 16.89,
55
+ "learning_rate": 4.0543735224586285e-05,
56
+ "loss": 0.0139,
57
+ "step": 800
 
 
 
 
 
 
58
  },
59
  {
60
  "epoch": 19.0,
61
+ "learning_rate": 3.936170212765958e-05,
62
+ "loss": 0.0125,
63
+ "step": 900
 
 
 
 
 
 
64
  },
65
  {
66
  "epoch": 21.11,
67
+ "learning_rate": 3.817966903073286e-05,
68
+ "loss": 0.0103,
69
+ "step": 1000
70
  },
71
  {
72
  "epoch": 21.11,
73
+ "eval_accuracy": 0.9500824809074402,
74
+ "eval_loss": 0.18024244904518127,
75
+ "eval_runtime": 47.0038,
76
+ "eval_samples_per_second": 51.57,
77
+ "eval_steps_per_second": 3.234,
78
+ "step": 1000
 
 
 
 
 
 
79
  },
80
  {
81
  "epoch": 23.22,
82
+ "learning_rate": 3.699763593380615e-05,
83
+ "loss": 0.0111,
84
+ "step": 1100
 
 
 
 
 
 
85
  },
86
  {
87
  "epoch": 25.33,
88
+ "learning_rate": 3.5815602836879437e-05,
89
+ "loss": 0.0093,
90
+ "step": 1200
 
 
 
 
 
 
91
  },
92
  {
93
  "epoch": 27.44,
94
+ "learning_rate": 3.463356973995272e-05,
95
+ "loss": 0.0109,
96
+ "step": 1300
 
 
 
 
 
 
97
  },
98
  {
99
  "epoch": 29.55,
100
+ "learning_rate": 3.3451536643026005e-05,
101
+ "loss": 0.0102,
102
+ "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
103
  },
104
  {
105
  "epoch": 31.66,
106
+ "learning_rate": 3.226950354609929e-05,
107
+ "loss": 0.012,
108
+ "step": 1500
 
 
 
 
 
 
 
 
 
109
  },
110
  {
111
  "epoch": 33.77,
112
+ "learning_rate": 3.108747044917258e-05,
113
+ "loss": 0.0116,
114
+ "step": 1600
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 35.88,
118
+ "learning_rate": 2.9905437352245862e-05,
119
+ "loss": 0.0145,
120
+ "step": 1700
 
 
 
 
 
 
121
  },
122
  {
123
  "epoch": 37.99,
124
+ "learning_rate": 2.8723404255319154e-05,
125
+ "loss": 0.011,
126
+ "step": 1800
 
 
 
 
 
 
127
  },
128
  {
129
  "epoch": 40.11,
130
+ "learning_rate": 2.7541371158392438e-05,
131
+ "loss": 0.0108,
132
+ "step": 1900
 
 
 
 
 
 
133
  },
134
  {
135
  "epoch": 42.22,
136
+ "learning_rate": 2.6359338061465723e-05,
137
+ "loss": 0.009,
138
+ "step": 2000
139
  },
140
  {
141
  "epoch": 42.22,
142
+ "eval_accuracy": 0.9496699571609497,
143
+ "eval_loss": 0.1716560274362564,
144
+ "eval_runtime": 51.12,
145
+ "eval_samples_per_second": 47.418,
146
+ "eval_steps_per_second": 2.973,
147
+ "step": 2000
 
 
 
 
 
 
148
  },
149
  {
150
  "epoch": 44.33,
151
+ "learning_rate": 2.5177304964539007e-05,
152
+ "loss": 0.0101,
153
+ "step": 2100
 
 
 
 
 
 
154
  },
155
  {
156
  "epoch": 46.44,
157
+ "learning_rate": 2.3995271867612295e-05,
158
+ "loss": 0.0087,
159
+ "step": 2200
 
 
 
 
 
 
160
  },
161
  {
162
  "epoch": 48.55,
163
+ "learning_rate": 2.281323877068558e-05,
164
+ "loss": 0.0114,
165
+ "step": 2300
 
 
 
 
 
 
166
  },
167
  {
168
  "epoch": 50.66,
169
+ "learning_rate": 2.1631205673758867e-05,
170
+ "loss": 0.0076,
171
+ "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
172
  },
173
  {
174
  "epoch": 52.77,
175
+ "learning_rate": 2.0449172576832152e-05,
176
+ "loss": 0.0088,
177
+ "step": 2500
 
 
 
 
 
 
 
 
 
178
  },
179
  {
180
  "epoch": 54.88,
181
+ "learning_rate": 1.926713947990544e-05,
182
+ "loss": 0.0084,
183
+ "step": 2600
 
 
 
 
 
 
184
  },
185
  {
186
  "epoch": 56.99,
187
+ "learning_rate": 1.8085106382978724e-05,
188
+ "loss": 0.0095,
189
+ "step": 2700
 
 
 
 
 
 
190
  },
191
  {
192
  "epoch": 59.1,
193
+ "learning_rate": 1.690307328605201e-05,
194
+ "loss": 0.0075,
195
+ "step": 2800
 
 
 
 
 
 
196
  },
197
  {
198
  "epoch": 61.21,
199
+ "learning_rate": 1.5721040189125296e-05,
200
+ "loss": 0.0097,
201
+ "step": 2900
 
 
 
 
 
 
202
  },
203
  {
204
  "epoch": 63.32,
205
+ "learning_rate": 1.4539007092198581e-05,
206
+ "loss": 0.0086,
207
+ "step": 3000
208
  },
209
  {
210
  "epoch": 63.32,
211
+ "eval_accuracy": 0.9546204805374146,
212
+ "eval_loss": 0.16754871606826782,
213
+ "eval_runtime": 52.1005,
214
+ "eval_samples_per_second": 46.525,
215
+ "eval_steps_per_second": 2.917,
216
+ "step": 3000
 
 
 
 
 
 
217
  },
218
  {
219
  "epoch": 65.44,
220
+ "learning_rate": 1.3356973995271869e-05,
221
+ "loss": 0.0079,
222
+ "step": 3100
 
 
 
 
 
 
223
  },
224
  {
225
  "epoch": 67.55,
226
+ "learning_rate": 1.2174940898345153e-05,
227
+ "loss": 0.0076,
228
+ "step": 3200
 
 
 
 
 
 
229
  },
230
  {
231
  "epoch": 69.66,
232
+ "learning_rate": 1.0992907801418441e-05,
233
+ "loss": 0.0072,
234
+ "step": 3300
 
 
 
 
 
 
235
  },
236
  {
237
  "epoch": 71.77,
238
+ "learning_rate": 9.810874704491727e-06,
 
 
 
 
 
 
239
  "loss": 0.0074,
240
+ "step": 3400
 
 
 
 
 
 
241
  },
242
  {
243
  "epoch": 73.88,
244
+ "learning_rate": 8.628841607565012e-06,
245
+ "loss": 0.0076,
246
+ "step": 3500
 
 
 
 
 
 
 
 
 
247
  },
248
  {
249
  "epoch": 75.99,
250
+ "learning_rate": 7.446808510638298e-06,
251
+ "loss": 0.0069,
252
+ "step": 3600
 
 
 
 
 
 
253
  },
254
  {
255
  "epoch": 78.1,
256
+ "learning_rate": 6.264775413711583e-06,
257
+ "loss": 0.0068,
258
+ "step": 3700
 
 
 
 
 
 
259
  },
260
  {
261
  "epoch": 80.21,
262
+ "learning_rate": 5.08274231678487e-06,
263
+ "loss": 0.007,
264
+ "step": 3800
 
 
 
 
 
 
265
  },
266
  {
267
  "epoch": 82.32,
268
+ "learning_rate": 3.9007092198581565e-06,
269
  "loss": 0.0072,
270
+ "step": 3900
 
 
 
 
 
 
271
  },
272
  {
273
  "epoch": 84.43,
274
+ "learning_rate": 2.7186761229314422e-06,
275
+ "loss": 0.0073,
276
+ "step": 4000
277
  },
278
  {
279
  "epoch": 84.43,
280
+ "eval_accuracy": 0.9537953734397888,
281
+ "eval_loss": 0.16863200068473816,
282
+ "eval_runtime": 51.419,
283
+ "eval_samples_per_second": 47.142,
284
+ "eval_steps_per_second": 2.956,
285
+ "step": 4000
 
 
 
 
 
 
286
  },
287
  {
288
  "epoch": 86.54,
289
+ "learning_rate": 1.5484633569739953e-06,
290
+ "loss": 0.0065,
291
+ "step": 4100
 
 
 
 
 
 
292
  },
293
  {
294
  "epoch": 88.65,
295
+ "learning_rate": 3.6643026004728135e-07,
296
+ "loss": 0.0058,
297
+ "step": 4200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  },
299
  {
300
+ "epoch": 89.29,
301
+ "step": 4230,
302
+ "total_flos": 3.2880550437308154e+19,
303
+ "train_loss": 0.00991302564845863,
304
+ "train_runtime": 33902.9866,
305
+ "train_samples_per_second": 32.174,
306
+ "train_steps_per_second": 0.125
307
  },
308
  {
309
+ "epoch": 89.29,
310
+ "eval_accuracy": 0.9554455280303955,
311
+ "eval_loss": 0.1719195693731308,
312
+ "eval_runtime": 50.8715,
313
+ "eval_samples_per_second": 47.65,
314
+ "eval_steps_per_second": 2.988,
315
+ "step": 4230
316
  }
317
  ],
318
  "logging_steps": 100,
319
+ "max_steps": 4230,
320
+ "num_train_epochs": 90,
321
  "save_steps": 2000,
322
+ "total_flos": 3.2880550437308154e+19,
323
  "trial_name": null,
324
  "trial_params": null
325
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c70a44bda6db02475f8931fe8cc433c517fc85e0209b10da04cf01b310a6ea2
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afafbb8d751fc33fbf51d1298497b2fbbc858aa6e7af5c8ee9fc1310c74fcc53
3
  size 4155