lesso's picture
Training in progress, step 50, checkpoint
bb681e9 verified
raw
history blame
9.98 kB
{
"best_metric": 2.966737747192383,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.01552553951249806,
"eval_steps": 50,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003105107902499612,
"grad_norm": 29.29452133178711,
"learning_rate": 1.003e-05,
"loss": 6.4691,
"step": 1
},
{
"epoch": 0.0003105107902499612,
"eval_loss": 3.4762182235717773,
"eval_runtime": 174.2602,
"eval_samples_per_second": 7.781,
"eval_steps_per_second": 1.945,
"step": 1
},
{
"epoch": 0.0006210215804999224,
"grad_norm": 33.738006591796875,
"learning_rate": 2.006e-05,
"loss": 6.5997,
"step": 2
},
{
"epoch": 0.0009315323707498836,
"grad_norm": 21.22829818725586,
"learning_rate": 3.0089999999999998e-05,
"loss": 6.521,
"step": 3
},
{
"epoch": 0.0012420431609998447,
"grad_norm": 19.39741325378418,
"learning_rate": 4.012e-05,
"loss": 6.243,
"step": 4
},
{
"epoch": 0.0015525539512498058,
"grad_norm": 18.032846450805664,
"learning_rate": 5.015e-05,
"loss": 6.0875,
"step": 5
},
{
"epoch": 0.0018630647414997672,
"grad_norm": 4.453341960906982,
"learning_rate": 6.0179999999999996e-05,
"loss": 6.0478,
"step": 6
},
{
"epoch": 0.002173575531749728,
"grad_norm": 31.29482650756836,
"learning_rate": 7.021e-05,
"loss": 6.1593,
"step": 7
},
{
"epoch": 0.0024840863219996894,
"grad_norm": 5.259471893310547,
"learning_rate": 8.024e-05,
"loss": 6.2802,
"step": 8
},
{
"epoch": 0.0027945971122496508,
"grad_norm": 28.345125198364258,
"learning_rate": 9.027e-05,
"loss": 6.4323,
"step": 9
},
{
"epoch": 0.0031051079024996117,
"grad_norm": 1.65045964717865,
"learning_rate": 0.0001003,
"loss": 5.9546,
"step": 10
},
{
"epoch": 0.003415618692749573,
"grad_norm": 13.602124214172363,
"learning_rate": 9.97721052631579e-05,
"loss": 6.123,
"step": 11
},
{
"epoch": 0.0037261294829995344,
"grad_norm": 11.951309204101562,
"learning_rate": 9.924421052631578e-05,
"loss": 5.9864,
"step": 12
},
{
"epoch": 0.004036640273249495,
"grad_norm": 36.824344635009766,
"learning_rate": 9.871631578947368e-05,
"loss": 6.1966,
"step": 13
},
{
"epoch": 0.004347151063499456,
"grad_norm": 8.036162376403809,
"learning_rate": 9.818842105263158e-05,
"loss": 6.3172,
"step": 14
},
{
"epoch": 0.004657661853749418,
"grad_norm": 21.814775466918945,
"learning_rate": 9.766052631578948e-05,
"loss": 6.2402,
"step": 15
},
{
"epoch": 0.004968172643999379,
"grad_norm": 12.35395336151123,
"learning_rate": 9.713263157894736e-05,
"loss": 6.067,
"step": 16
},
{
"epoch": 0.00527868343424934,
"grad_norm": 3.0644829273223877,
"learning_rate": 9.660473684210526e-05,
"loss": 5.9228,
"step": 17
},
{
"epoch": 0.0055891942244993015,
"grad_norm": 7.201811790466309,
"learning_rate": 9.607684210526316e-05,
"loss": 5.9131,
"step": 18
},
{
"epoch": 0.0058997050147492625,
"grad_norm": 4.769659042358398,
"learning_rate": 9.554894736842104e-05,
"loss": 6.0645,
"step": 19
},
{
"epoch": 0.006210215804999223,
"grad_norm": 1.7823907136917114,
"learning_rate": 9.502105263157894e-05,
"loss": 5.857,
"step": 20
},
{
"epoch": 0.006520726595249185,
"grad_norm": 1.8709537982940674,
"learning_rate": 9.449315789473684e-05,
"loss": 6.0435,
"step": 21
},
{
"epoch": 0.006831237385499146,
"grad_norm": 3.4857585430145264,
"learning_rate": 9.396526315789474e-05,
"loss": 5.9443,
"step": 22
},
{
"epoch": 0.007141748175749107,
"grad_norm": 16.777128219604492,
"learning_rate": 9.343736842105264e-05,
"loss": 6.1941,
"step": 23
},
{
"epoch": 0.007452258965999069,
"grad_norm": 4.266728401184082,
"learning_rate": 9.290947368421052e-05,
"loss": 6.0035,
"step": 24
},
{
"epoch": 0.00776276975624903,
"grad_norm": 7.070564270019531,
"learning_rate": 9.238157894736842e-05,
"loss": 6.0351,
"step": 25
},
{
"epoch": 0.00807328054649899,
"grad_norm": 31.87989044189453,
"learning_rate": 9.18536842105263e-05,
"loss": 6.2323,
"step": 26
},
{
"epoch": 0.008383791336748951,
"grad_norm": 2.6480438709259033,
"learning_rate": 9.132578947368422e-05,
"loss": 6.0649,
"step": 27
},
{
"epoch": 0.008694302126998912,
"grad_norm": 2.978304386138916,
"learning_rate": 9.07978947368421e-05,
"loss": 5.9796,
"step": 28
},
{
"epoch": 0.009004812917248875,
"grad_norm": 2.9000942707061768,
"learning_rate": 9.027e-05,
"loss": 5.9194,
"step": 29
},
{
"epoch": 0.009315323707498836,
"grad_norm": 16.504867553710938,
"learning_rate": 8.97421052631579e-05,
"loss": 6.226,
"step": 30
},
{
"epoch": 0.009625834497748797,
"grad_norm": 2.9846396446228027,
"learning_rate": 8.921421052631578e-05,
"loss": 5.9803,
"step": 31
},
{
"epoch": 0.009936345287998758,
"grad_norm": 1.9557408094406128,
"learning_rate": 8.868631578947368e-05,
"loss": 5.9391,
"step": 32
},
{
"epoch": 0.010246856078248719,
"grad_norm": 5.9934821128845215,
"learning_rate": 8.815842105263157e-05,
"loss": 5.9945,
"step": 33
},
{
"epoch": 0.01055736686849868,
"grad_norm": 3.079267978668213,
"learning_rate": 8.763052631578948e-05,
"loss": 5.8665,
"step": 34
},
{
"epoch": 0.010867877658748642,
"grad_norm": 2.353879928588867,
"learning_rate": 8.710263157894737e-05,
"loss": 5.9495,
"step": 35
},
{
"epoch": 0.011178388448998603,
"grad_norm": 2.3978257179260254,
"learning_rate": 8.657473684210526e-05,
"loss": 5.9834,
"step": 36
},
{
"epoch": 0.011488899239248564,
"grad_norm": 1.9142018556594849,
"learning_rate": 8.604684210526316e-05,
"loss": 6.1172,
"step": 37
},
{
"epoch": 0.011799410029498525,
"grad_norm": 4.943174839019775,
"learning_rate": 8.551894736842105e-05,
"loss": 5.8814,
"step": 38
},
{
"epoch": 0.012109920819748486,
"grad_norm": 2.114778518676758,
"learning_rate": 8.499105263157895e-05,
"loss": 6.1513,
"step": 39
},
{
"epoch": 0.012420431609998447,
"grad_norm": 6.381146430969238,
"learning_rate": 8.446315789473683e-05,
"loss": 6.0642,
"step": 40
},
{
"epoch": 0.01273094240024841,
"grad_norm": 2.5670583248138428,
"learning_rate": 8.393526315789474e-05,
"loss": 5.934,
"step": 41
},
{
"epoch": 0.01304145319049837,
"grad_norm": 3.5908706188201904,
"learning_rate": 8.340736842105263e-05,
"loss": 5.9664,
"step": 42
},
{
"epoch": 0.013351963980748331,
"grad_norm": 4.755733013153076,
"learning_rate": 8.287947368421053e-05,
"loss": 5.9252,
"step": 43
},
{
"epoch": 0.013662474770998292,
"grad_norm": 3.1427011489868164,
"learning_rate": 8.235157894736842e-05,
"loss": 5.8733,
"step": 44
},
{
"epoch": 0.013972985561248253,
"grad_norm": 4.566623687744141,
"learning_rate": 8.182368421052631e-05,
"loss": 5.9674,
"step": 45
},
{
"epoch": 0.014283496351498214,
"grad_norm": 1.8994089365005493,
"learning_rate": 8.129578947368421e-05,
"loss": 5.8891,
"step": 46
},
{
"epoch": 0.014594007141748177,
"grad_norm": 2.4542181491851807,
"learning_rate": 8.07678947368421e-05,
"loss": 6.0727,
"step": 47
},
{
"epoch": 0.014904517931998137,
"grad_norm": 3.8890998363494873,
"learning_rate": 8.024e-05,
"loss": 5.7988,
"step": 48
},
{
"epoch": 0.015215028722248098,
"grad_norm": 6.967671871185303,
"learning_rate": 7.97121052631579e-05,
"loss": 5.9372,
"step": 49
},
{
"epoch": 0.01552553951249806,
"grad_norm": 3.1538615226745605,
"learning_rate": 7.918421052631579e-05,
"loss": 6.0131,
"step": 50
},
{
"epoch": 0.01552553951249806,
"eval_loss": 2.966737747192383,
"eval_runtime": 174.432,
"eval_samples_per_second": 7.774,
"eval_steps_per_second": 1.943,
"step": 50
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.1286095383691264e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}