bert-base-uncased-mean-200 / trainer_state.json
sobamchan's picture
Upload folder using huggingface_hub
a1ddacf verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.045882083046570314,
"eval_steps": 5,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0011470520761642578,
"eval_loss": 2.755366325378418,
"eval_runtime": 14.0088,
"eval_samples_per_second": 469.989,
"eval_steps_per_second": 3.712,
"step": 5
},
{
"epoch": 0.0022941041523285156,
"eval_loss": 2.750617027282715,
"eval_runtime": 14.0654,
"eval_samples_per_second": 468.099,
"eval_steps_per_second": 3.697,
"step": 10
},
{
"epoch": 0.0034411562284927736,
"eval_loss": 2.7424304485321045,
"eval_runtime": 14.1186,
"eval_samples_per_second": 466.336,
"eval_steps_per_second": 3.683,
"step": 15
},
{
"epoch": 0.004588208304657031,
"eval_loss": 2.730889081954956,
"eval_runtime": 14.2451,
"eval_samples_per_second": 462.193,
"eval_steps_per_second": 3.65,
"step": 20
},
{
"epoch": 0.005735260380821289,
"eval_loss": 2.7159576416015625,
"eval_runtime": 14.245,
"eval_samples_per_second": 462.199,
"eval_steps_per_second": 3.65,
"step": 25
},
{
"epoch": 0.006882312456985547,
"eval_loss": 2.6975343227386475,
"eval_runtime": 14.3448,
"eval_samples_per_second": 458.981,
"eval_steps_per_second": 3.625,
"step": 30
},
{
"epoch": 0.008029364533149804,
"eval_loss": 2.675693988800049,
"eval_runtime": 16.965,
"eval_samples_per_second": 388.094,
"eval_steps_per_second": 3.065,
"step": 35
},
{
"epoch": 0.009176416609314062,
"eval_loss": 2.650242328643799,
"eval_runtime": 14.2414,
"eval_samples_per_second": 462.315,
"eval_steps_per_second": 3.651,
"step": 40
},
{
"epoch": 0.01032346868547832,
"eval_loss": 2.6213560104370117,
"eval_runtime": 14.4168,
"eval_samples_per_second": 456.689,
"eval_steps_per_second": 3.607,
"step": 45
},
{
"epoch": 0.011470520761642579,
"eval_loss": 2.5892951488494873,
"eval_runtime": 14.3332,
"eval_samples_per_second": 459.353,
"eval_steps_per_second": 3.628,
"step": 50
},
{
"epoch": 0.012617572837806837,
"eval_loss": 2.5538384914398193,
"eval_runtime": 14.4067,
"eval_samples_per_second": 457.01,
"eval_steps_per_second": 3.609,
"step": 55
},
{
"epoch": 0.013764624913971095,
"eval_loss": 2.51452374458313,
"eval_runtime": 14.4948,
"eval_samples_per_second": 454.233,
"eval_steps_per_second": 3.588,
"step": 60
},
{
"epoch": 0.014911676990135353,
"eval_loss": 2.4726295471191406,
"eval_runtime": 14.3162,
"eval_samples_per_second": 459.898,
"eval_steps_per_second": 3.632,
"step": 65
},
{
"epoch": 0.01605872906629961,
"eval_loss": 2.428196668624878,
"eval_runtime": 14.3204,
"eval_samples_per_second": 459.763,
"eval_steps_per_second": 3.631,
"step": 70
},
{
"epoch": 0.017205781142463867,
"eval_loss": 2.379453659057617,
"eval_runtime": 14.3796,
"eval_samples_per_second": 457.872,
"eval_steps_per_second": 3.616,
"step": 75
},
{
"epoch": 0.018352833218628125,
"eval_loss": 2.327221393585205,
"eval_runtime": 14.3415,
"eval_samples_per_second": 459.089,
"eval_steps_per_second": 3.626,
"step": 80
},
{
"epoch": 0.019499885294792383,
"eval_loss": 2.2712388038635254,
"eval_runtime": 14.462,
"eval_samples_per_second": 455.263,
"eval_steps_per_second": 3.596,
"step": 85
},
{
"epoch": 0.02064693737095664,
"eval_loss": 2.2120068073272705,
"eval_runtime": 14.334,
"eval_samples_per_second": 459.326,
"eval_steps_per_second": 3.628,
"step": 90
},
{
"epoch": 0.0217939894471209,
"eval_loss": 2.1501331329345703,
"eval_runtime": 14.3506,
"eval_samples_per_second": 458.796,
"eval_steps_per_second": 3.624,
"step": 95
},
{
"epoch": 0.022941041523285157,
"grad_norm": 19.895355224609375,
"learning_rate": 7.645259938837921e-07,
"loss": 3.6197,
"step": 100
},
{
"epoch": 0.022941041523285157,
"eval_loss": 2.086596727371216,
"eval_runtime": 16.6465,
"eval_samples_per_second": 395.519,
"eval_steps_per_second": 3.124,
"step": 100
},
{
"epoch": 0.024088093599449415,
"eval_loss": 2.0223236083984375,
"eval_runtime": 14.471,
"eval_samples_per_second": 454.979,
"eval_steps_per_second": 3.593,
"step": 105
},
{
"epoch": 0.025235145675613673,
"eval_loss": 1.9571231603622437,
"eval_runtime": 14.3783,
"eval_samples_per_second": 457.913,
"eval_steps_per_second": 3.617,
"step": 110
},
{
"epoch": 0.02638219775177793,
"eval_loss": 1.890655517578125,
"eval_runtime": 14.288,
"eval_samples_per_second": 460.805,
"eval_steps_per_second": 3.639,
"step": 115
},
{
"epoch": 0.02752924982794219,
"eval_loss": 1.823920488357544,
"eval_runtime": 14.3764,
"eval_samples_per_second": 457.972,
"eval_steps_per_second": 3.617,
"step": 120
},
{
"epoch": 0.028676301904106447,
"eval_loss": 1.758331298828125,
"eval_runtime": 14.2666,
"eval_samples_per_second": 461.499,
"eval_steps_per_second": 3.645,
"step": 125
},
{
"epoch": 0.029823353980270705,
"eval_loss": 1.6937522888183594,
"eval_runtime": 14.2522,
"eval_samples_per_second": 461.963,
"eval_steps_per_second": 3.649,
"step": 130
},
{
"epoch": 0.030970406056434963,
"eval_loss": 1.6316019296646118,
"eval_runtime": 16.3104,
"eval_samples_per_second": 403.67,
"eval_steps_per_second": 3.188,
"step": 135
},
{
"epoch": 0.03211745813259922,
"eval_loss": 1.571895718574524,
"eval_runtime": 14.2422,
"eval_samples_per_second": 462.288,
"eval_steps_per_second": 3.651,
"step": 140
},
{
"epoch": 0.033264510208763476,
"eval_loss": 1.5148202180862427,
"eval_runtime": 14.3624,
"eval_samples_per_second": 458.418,
"eval_steps_per_second": 3.621,
"step": 145
},
{
"epoch": 0.034411562284927734,
"eval_loss": 1.459762454032898,
"eval_runtime": 14.3035,
"eval_samples_per_second": 460.308,
"eval_steps_per_second": 3.635,
"step": 150
},
{
"epoch": 0.03555861436109199,
"eval_loss": 1.4081143140792847,
"eval_runtime": 14.2988,
"eval_samples_per_second": 460.46,
"eval_steps_per_second": 3.637,
"step": 155
},
{
"epoch": 0.03670566643725625,
"eval_loss": 1.3612124919891357,
"eval_runtime": 14.2569,
"eval_samples_per_second": 461.812,
"eval_steps_per_second": 3.647,
"step": 160
},
{
"epoch": 0.03785271851342051,
"eval_loss": 1.318212866783142,
"eval_runtime": 14.282,
"eval_samples_per_second": 460.999,
"eval_steps_per_second": 3.641,
"step": 165
},
{
"epoch": 0.038999770589584766,
"eval_loss": 1.2802687883377075,
"eval_runtime": 14.4058,
"eval_samples_per_second": 457.038,
"eval_steps_per_second": 3.61,
"step": 170
},
{
"epoch": 0.040146822665749024,
"eval_loss": 1.246294617652893,
"eval_runtime": 14.2804,
"eval_samples_per_second": 461.051,
"eval_steps_per_second": 3.641,
"step": 175
},
{
"epoch": 0.04129387474191328,
"eval_loss": 1.2160167694091797,
"eval_runtime": 14.3449,
"eval_samples_per_second": 458.977,
"eval_steps_per_second": 3.625,
"step": 180
},
{
"epoch": 0.04244092681807754,
"eval_loss": 1.189509630203247,
"eval_runtime": 14.2361,
"eval_samples_per_second": 462.486,
"eval_steps_per_second": 3.653,
"step": 185
},
{
"epoch": 0.0435879788942418,
"eval_loss": 1.1653709411621094,
"eval_runtime": 14.3748,
"eval_samples_per_second": 458.025,
"eval_steps_per_second": 3.617,
"step": 190
},
{
"epoch": 0.044735030970406056,
"eval_loss": 1.143513798713684,
"eval_runtime": 14.25,
"eval_samples_per_second": 462.035,
"eval_steps_per_second": 3.649,
"step": 195
},
{
"epoch": 0.045882083046570314,
"grad_norm": 8.192963600158691,
"learning_rate": 1.5290519877675841e-06,
"loss": 2.292,
"step": 200
},
{
"epoch": 0.045882083046570314,
"eval_loss": 1.1239805221557617,
"eval_runtime": 16.3012,
"eval_samples_per_second": 403.896,
"eval_steps_per_second": 3.19,
"step": 200
}
],
"logging_steps": 100,
"max_steps": 13077,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}