DeepDream2045's picture
Training in progress, step 50, checkpoint
efa2537 verified
{
"best_metric": 1.0168567895889282,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.06777363605557438,
"eval_steps": 25,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013554727211114877,
"grad_norm": 36.596012115478516,
"learning_rate": 5e-05,
"loss": 63.4065,
"step": 1
},
{
"epoch": 0.0013554727211114877,
"eval_loss": 3.8362667560577393,
"eval_runtime": 213.6984,
"eval_samples_per_second": 23.257,
"eval_steps_per_second": 2.911,
"step": 1
},
{
"epoch": 0.0027109454422229754,
"grad_norm": 38.99151611328125,
"learning_rate": 0.0001,
"loss": 64.8437,
"step": 2
},
{
"epoch": 0.004066418163334463,
"grad_norm": 41.29121017456055,
"learning_rate": 9.989294616193017e-05,
"loss": 65.1126,
"step": 3
},
{
"epoch": 0.005421890884445951,
"grad_norm": 42.77500534057617,
"learning_rate": 9.957224306869053e-05,
"loss": 60.8543,
"step": 4
},
{
"epoch": 0.006777363605557438,
"grad_norm": 32.83053207397461,
"learning_rate": 9.903926402016153e-05,
"loss": 49.4978,
"step": 5
},
{
"epoch": 0.008132836326668925,
"grad_norm": 38.703086853027344,
"learning_rate": 9.829629131445342e-05,
"loss": 46.339,
"step": 6
},
{
"epoch": 0.009488309047780414,
"grad_norm": 42.6236572265625,
"learning_rate": 9.73465064747553e-05,
"loss": 41.951,
"step": 7
},
{
"epoch": 0.010843781768891902,
"grad_norm": 30.51891326904297,
"learning_rate": 9.619397662556435e-05,
"loss": 35.5313,
"step": 8
},
{
"epoch": 0.012199254490003388,
"grad_norm": 36.923702239990234,
"learning_rate": 9.484363707663442e-05,
"loss": 31.8236,
"step": 9
},
{
"epoch": 0.013554727211114876,
"grad_norm": 21.106178283691406,
"learning_rate": 9.330127018922194e-05,
"loss": 32.1737,
"step": 10
},
{
"epoch": 0.014910199932226365,
"grad_norm": 37.03933334350586,
"learning_rate": 9.157348061512727e-05,
"loss": 27.7951,
"step": 11
},
{
"epoch": 0.01626567265333785,
"grad_norm": 50.268577575683594,
"learning_rate": 8.966766701456177e-05,
"loss": 28.1014,
"step": 12
},
{
"epoch": 0.01762114537444934,
"grad_norm": 61.605918884277344,
"learning_rate": 8.759199037394887e-05,
"loss": 28.6188,
"step": 13
},
{
"epoch": 0.018976618095560827,
"grad_norm": 214.79013061523438,
"learning_rate": 8.535533905932738e-05,
"loss": 25.8743,
"step": 14
},
{
"epoch": 0.020332090816672314,
"grad_norm": 451.5144348144531,
"learning_rate": 8.296729075500344e-05,
"loss": 28.1142,
"step": 15
},
{
"epoch": 0.021687563537783804,
"grad_norm": 292.95025634765625,
"learning_rate": 8.043807145043604e-05,
"loss": 28.5825,
"step": 16
},
{
"epoch": 0.02304303625889529,
"grad_norm": 254.0452880859375,
"learning_rate": 7.777851165098012e-05,
"loss": 28.3858,
"step": 17
},
{
"epoch": 0.024398508980006776,
"grad_norm": 75.8310775756836,
"learning_rate": 7.500000000000001e-05,
"loss": 26.6884,
"step": 18
},
{
"epoch": 0.025753981701118266,
"grad_norm": 60.58232116699219,
"learning_rate": 7.211443451095007e-05,
"loss": 27.6412,
"step": 19
},
{
"epoch": 0.027109454422229753,
"grad_norm": 39.16401290893555,
"learning_rate": 6.91341716182545e-05,
"loss": 24.6649,
"step": 20
},
{
"epoch": 0.02846492714334124,
"grad_norm": 32.388431549072266,
"learning_rate": 6.607197326515808e-05,
"loss": 25.1232,
"step": 21
},
{
"epoch": 0.02982039986445273,
"grad_norm": 22.858781814575195,
"learning_rate": 6.294095225512603e-05,
"loss": 21.7579,
"step": 22
},
{
"epoch": 0.031175872585564215,
"grad_norm": 17.251733779907227,
"learning_rate": 5.9754516100806423e-05,
"loss": 21.4803,
"step": 23
},
{
"epoch": 0.0325313453066757,
"grad_norm": 20.60053825378418,
"learning_rate": 5.6526309611002594e-05,
"loss": 20.8267,
"step": 24
},
{
"epoch": 0.03388681802778719,
"grad_norm": 22.706756591796875,
"learning_rate": 5.327015646150716e-05,
"loss": 22.8192,
"step": 25
},
{
"epoch": 0.03388681802778719,
"eval_loss": 1.2665033340454102,
"eval_runtime": 213.7001,
"eval_samples_per_second": 23.257,
"eval_steps_per_second": 2.911,
"step": 25
},
{
"epoch": 0.03524229074889868,
"grad_norm": 18.225135803222656,
"learning_rate": 5e-05,
"loss": 20.766,
"step": 26
},
{
"epoch": 0.03659776347001017,
"grad_norm": 16.9871768951416,
"learning_rate": 4.6729843538492847e-05,
"loss": 21.6636,
"step": 27
},
{
"epoch": 0.037953236191121655,
"grad_norm": 17.358606338500977,
"learning_rate": 4.347369038899744e-05,
"loss": 19.1701,
"step": 28
},
{
"epoch": 0.03930870891223314,
"grad_norm": 17.15289306640625,
"learning_rate": 4.0245483899193595e-05,
"loss": 18.1974,
"step": 29
},
{
"epoch": 0.04066418163334463,
"grad_norm": 13.593099594116211,
"learning_rate": 3.705904774487396e-05,
"loss": 18.8118,
"step": 30
},
{
"epoch": 0.042019654354456114,
"grad_norm": 15.375582695007324,
"learning_rate": 3.392802673484193e-05,
"loss": 18.5611,
"step": 31
},
{
"epoch": 0.04337512707556761,
"grad_norm": 15.086400985717773,
"learning_rate": 3.086582838174551e-05,
"loss": 18.7409,
"step": 32
},
{
"epoch": 0.044730599796679094,
"grad_norm": 20.995872497558594,
"learning_rate": 2.7885565489049946e-05,
"loss": 18.5098,
"step": 33
},
{
"epoch": 0.04608607251779058,
"grad_norm": 33.625465393066406,
"learning_rate": 2.500000000000001e-05,
"loss": 17.3266,
"step": 34
},
{
"epoch": 0.047441545238902066,
"grad_norm": 16.21245574951172,
"learning_rate": 2.2221488349019903e-05,
"loss": 17.6345,
"step": 35
},
{
"epoch": 0.04879701796001355,
"grad_norm": 16.550430297851562,
"learning_rate": 1.9561928549563968e-05,
"loss": 16.328,
"step": 36
},
{
"epoch": 0.05015249068112504,
"grad_norm": 19.983448028564453,
"learning_rate": 1.703270924499656e-05,
"loss": 18.239,
"step": 37
},
{
"epoch": 0.05150796340223653,
"grad_norm": 20.869291305541992,
"learning_rate": 1.4644660940672627e-05,
"loss": 19.6876,
"step": 38
},
{
"epoch": 0.05286343612334802,
"grad_norm": 18.964210510253906,
"learning_rate": 1.2408009626051137e-05,
"loss": 16.477,
"step": 39
},
{
"epoch": 0.054218908844459505,
"grad_norm": 15.887338638305664,
"learning_rate": 1.0332332985438248e-05,
"loss": 16.8681,
"step": 40
},
{
"epoch": 0.05557438156557099,
"grad_norm": 16.398021697998047,
"learning_rate": 8.426519384872733e-06,
"loss": 15.5168,
"step": 41
},
{
"epoch": 0.05692985428668248,
"grad_norm": 23.358903884887695,
"learning_rate": 6.698729810778065e-06,
"loss": 17.0795,
"step": 42
},
{
"epoch": 0.058285327007793965,
"grad_norm": 27.82067108154297,
"learning_rate": 5.156362923365588e-06,
"loss": 17.604,
"step": 43
},
{
"epoch": 0.05964079972890546,
"grad_norm": 21.790048599243164,
"learning_rate": 3.8060233744356633e-06,
"loss": 17.5052,
"step": 44
},
{
"epoch": 0.060996272450016945,
"grad_norm": 21.848608016967773,
"learning_rate": 2.653493525244721e-06,
"loss": 17.0257,
"step": 45
},
{
"epoch": 0.06235174517112843,
"grad_norm": 15.1517333984375,
"learning_rate": 1.70370868554659e-06,
"loss": 16.9079,
"step": 46
},
{
"epoch": 0.06370721789223992,
"grad_norm": 14.538222312927246,
"learning_rate": 9.607359798384785e-07,
"loss": 15.9922,
"step": 47
},
{
"epoch": 0.0650626906133514,
"grad_norm": 16.66765785217285,
"learning_rate": 4.277569313094809e-07,
"loss": 17.3769,
"step": 48
},
{
"epoch": 0.06641816333446289,
"grad_norm": 18.048357009887695,
"learning_rate": 1.0705383806982606e-07,
"loss": 16.0147,
"step": 49
},
{
"epoch": 0.06777363605557438,
"grad_norm": 19.516748428344727,
"learning_rate": 0.0,
"loss": 18.572,
"step": 50
},
{
"epoch": 0.06777363605557438,
"eval_loss": 1.0168567895889282,
"eval_runtime": 213.7505,
"eval_samples_per_second": 23.251,
"eval_steps_per_second": 2.91,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.259156315439104e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}