kvasYA's picture
Training in progress, step 50, checkpoint
88db979 verified
{
"best_metric": 0.41836661100387573,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.07928249343441851,
"eval_steps": 25,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015856498686883703,
"grad_norm": 1.2695943117141724,
"learning_rate": 5e-05,
"loss": 14.701,
"step": 1
},
{
"epoch": 0.0015856498686883703,
"eval_loss": 0.7034574151039124,
"eval_runtime": 32.2742,
"eval_samples_per_second": 1.549,
"eval_steps_per_second": 1.549,
"step": 1
},
{
"epoch": 0.0031712997373767406,
"grad_norm": 1.7672878503799438,
"learning_rate": 0.0001,
"loss": 15.9017,
"step": 2
},
{
"epoch": 0.00475694960606511,
"grad_norm": 1.6396403312683105,
"learning_rate": 9.990365154573717e-05,
"loss": 14.1394,
"step": 3
},
{
"epoch": 0.006342599474753481,
"grad_norm": 2.486941337585449,
"learning_rate": 9.961501876182148e-05,
"loss": 17.648,
"step": 4
},
{
"epoch": 0.007928249343441851,
"grad_norm": 2.6891353130340576,
"learning_rate": 9.913533761814537e-05,
"loss": 17.1247,
"step": 5
},
{
"epoch": 0.00951389921213022,
"grad_norm": 3.120403528213501,
"learning_rate": 9.846666218300807e-05,
"loss": 18.4393,
"step": 6
},
{
"epoch": 0.011099549080818592,
"grad_norm": 3.2562410831451416,
"learning_rate": 9.761185582727977e-05,
"loss": 16.208,
"step": 7
},
{
"epoch": 0.012685198949506963,
"grad_norm": 3.946000099182129,
"learning_rate": 9.657457896300791e-05,
"loss": 18.0389,
"step": 8
},
{
"epoch": 0.014270848818195332,
"grad_norm": 4.844947814941406,
"learning_rate": 9.535927336897098e-05,
"loss": 16.1009,
"step": 9
},
{
"epoch": 0.015856498686883703,
"grad_norm": 5.804704666137695,
"learning_rate": 9.397114317029975e-05,
"loss": 16.2563,
"step": 10
},
{
"epoch": 0.017442148555572072,
"grad_norm": 5.010881423950195,
"learning_rate": 9.241613255361455e-05,
"loss": 14.875,
"step": 11
},
{
"epoch": 0.01902779842426044,
"grad_norm": 4.86008882522583,
"learning_rate": 9.070090031310558e-05,
"loss": 15.9194,
"step": 12
},
{
"epoch": 0.020613448292948814,
"grad_norm": 4.434284687042236,
"learning_rate": 8.883279133655399e-05,
"loss": 12.6462,
"step": 13
},
{
"epoch": 0.022199098161637183,
"grad_norm": 4.004912376403809,
"learning_rate": 8.681980515339464e-05,
"loss": 14.5125,
"step": 14
},
{
"epoch": 0.023784748030325552,
"grad_norm": 4.139750957489014,
"learning_rate": 8.467056167950311e-05,
"loss": 13.1571,
"step": 15
},
{
"epoch": 0.025370397899013925,
"grad_norm": 4.041270732879639,
"learning_rate": 8.239426430539243e-05,
"loss": 13.7225,
"step": 16
},
{
"epoch": 0.026956047767702294,
"grad_norm": 3.7319929599761963,
"learning_rate": 8.000066048588211e-05,
"loss": 13.4277,
"step": 17
},
{
"epoch": 0.028541697636390664,
"grad_norm": 4.708305835723877,
"learning_rate": 7.75e-05,
"loss": 13.6227,
"step": 18
},
{
"epoch": 0.030127347505079036,
"grad_norm": 3.9304122924804688,
"learning_rate": 7.490299105985507e-05,
"loss": 12.0749,
"step": 19
},
{
"epoch": 0.031712997373767406,
"grad_norm": 4.36229133605957,
"learning_rate": 7.222075445642904e-05,
"loss": 12.5221,
"step": 20
},
{
"epoch": 0.03329864724245578,
"grad_norm": 4.089043617248535,
"learning_rate": 6.946477593864228e-05,
"loss": 12.4809,
"step": 21
},
{
"epoch": 0.034884297111144144,
"grad_norm": 4.526259899139404,
"learning_rate": 6.664685702961344e-05,
"loss": 13.1436,
"step": 22
},
{
"epoch": 0.03646994697983252,
"grad_norm": 4.510824680328369,
"learning_rate": 6.377906449072578e-05,
"loss": 13.9883,
"step": 23
},
{
"epoch": 0.03805559684852088,
"grad_norm": 4.993456840515137,
"learning_rate": 6.087367864990233e-05,
"loss": 11.316,
"step": 24
},
{
"epoch": 0.039641246717209255,
"grad_norm": 4.436211585998535,
"learning_rate": 5.794314081535644e-05,
"loss": 13.9108,
"step": 25
},
{
"epoch": 0.039641246717209255,
"eval_loss": 0.45079493522644043,
"eval_runtime": 32.3943,
"eval_samples_per_second": 1.543,
"eval_steps_per_second": 1.543,
"step": 25
},
{
"epoch": 0.04122689658589763,
"grad_norm": 4.445535182952881,
"learning_rate": 5.500000000000001e-05,
"loss": 15.3734,
"step": 26
},
{
"epoch": 0.042812546454585994,
"grad_norm": 4.722578525543213,
"learning_rate": 5.205685918464356e-05,
"loss": 15.0681,
"step": 27
},
{
"epoch": 0.044398196323274366,
"grad_norm": 4.662483215332031,
"learning_rate": 4.912632135009769e-05,
"loss": 12.8593,
"step": 28
},
{
"epoch": 0.04598384619196274,
"grad_norm": 4.774641036987305,
"learning_rate": 4.6220935509274235e-05,
"loss": 14.9759,
"step": 29
},
{
"epoch": 0.047569496060651105,
"grad_norm": 4.80148458480835,
"learning_rate": 4.3353142970386564e-05,
"loss": 14.2648,
"step": 30
},
{
"epoch": 0.04915514592933948,
"grad_norm": 4.728338718414307,
"learning_rate": 4.053522406135775e-05,
"loss": 13.1279,
"step": 31
},
{
"epoch": 0.05074079579802785,
"grad_norm": 4.526378631591797,
"learning_rate": 3.777924554357096e-05,
"loss": 12.6951,
"step": 32
},
{
"epoch": 0.052326445666716216,
"grad_norm": 4.543455600738525,
"learning_rate": 3.509700894014496e-05,
"loss": 12.6935,
"step": 33
},
{
"epoch": 0.05391209553540459,
"grad_norm": 4.797730922698975,
"learning_rate": 3.250000000000001e-05,
"loss": 12.7275,
"step": 34
},
{
"epoch": 0.05549774540409296,
"grad_norm": 5.633406162261963,
"learning_rate": 2.9999339514117912e-05,
"loss": 15.7208,
"step": 35
},
{
"epoch": 0.05708339527278133,
"grad_norm": 5.659888744354248,
"learning_rate": 2.760573569460757e-05,
"loss": 15.1841,
"step": 36
},
{
"epoch": 0.0586690451414697,
"grad_norm": 6.257349967956543,
"learning_rate": 2.53294383204969e-05,
"loss": 18.2712,
"step": 37
},
{
"epoch": 0.06025469501015807,
"grad_norm": 5.911546230316162,
"learning_rate": 2.3180194846605367e-05,
"loss": 15.9163,
"step": 38
},
{
"epoch": 0.06184034487884644,
"grad_norm": 5.786442756652832,
"learning_rate": 2.1167208663446025e-05,
"loss": 15.7632,
"step": 39
},
{
"epoch": 0.06342599474753481,
"grad_norm": 6.7470879554748535,
"learning_rate": 1.9299099686894423e-05,
"loss": 15.5596,
"step": 40
},
{
"epoch": 0.06501164461622318,
"grad_norm": 6.114599227905273,
"learning_rate": 1.758386744638546e-05,
"loss": 16.4764,
"step": 41
},
{
"epoch": 0.06659729448491156,
"grad_norm": 7.806087493896484,
"learning_rate": 1.602885682970026e-05,
"loss": 16.9766,
"step": 42
},
{
"epoch": 0.06818294435359992,
"grad_norm": 7.94345760345459,
"learning_rate": 1.464072663102903e-05,
"loss": 16.4281,
"step": 43
},
{
"epoch": 0.06976859422228829,
"grad_norm": 7.828482151031494,
"learning_rate": 1.3425421036992098e-05,
"loss": 18.5273,
"step": 44
},
{
"epoch": 0.07135424409097667,
"grad_norm": 8.026106834411621,
"learning_rate": 1.2388144172720251e-05,
"loss": 16.7366,
"step": 45
},
{
"epoch": 0.07293989395966503,
"grad_norm": 8.980546951293945,
"learning_rate": 1.1533337816991932e-05,
"loss": 18.6461,
"step": 46
},
{
"epoch": 0.0745255438283534,
"grad_norm": 9.830076217651367,
"learning_rate": 1.0864662381854632e-05,
"loss": 21.7088,
"step": 47
},
{
"epoch": 0.07611119369704177,
"grad_norm": 13.883023262023926,
"learning_rate": 1.0384981238178534e-05,
"loss": 25.3251,
"step": 48
},
{
"epoch": 0.07769684356573014,
"grad_norm": 17.941938400268555,
"learning_rate": 1.0096348454262845e-05,
"loss": 25.1379,
"step": 49
},
{
"epoch": 0.07928249343441851,
"grad_norm": 19.284452438354492,
"learning_rate": 1e-05,
"loss": 27.9763,
"step": 50
},
{
"epoch": 0.07928249343441851,
"eval_loss": 0.41836661100387573,
"eval_runtime": 32.3564,
"eval_samples_per_second": 1.545,
"eval_steps_per_second": 1.545,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 50,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 1,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.5578156589056e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}