|
{ |
|
"best_metric": 2.966737747192383, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.01552553951249806, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003105107902499612, |
|
"grad_norm": 29.29452133178711, |
|
"learning_rate": 1.003e-05, |
|
"loss": 6.4691, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003105107902499612, |
|
"eval_loss": 3.4762182235717773, |
|
"eval_runtime": 174.2602, |
|
"eval_samples_per_second": 7.781, |
|
"eval_steps_per_second": 1.945, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006210215804999224, |
|
"grad_norm": 33.738006591796875, |
|
"learning_rate": 2.006e-05, |
|
"loss": 6.5997, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0009315323707498836, |
|
"grad_norm": 21.22829818725586, |
|
"learning_rate": 3.0089999999999998e-05, |
|
"loss": 6.521, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0012420431609998447, |
|
"grad_norm": 19.39741325378418, |
|
"learning_rate": 4.012e-05, |
|
"loss": 6.243, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0015525539512498058, |
|
"grad_norm": 18.032846450805664, |
|
"learning_rate": 5.015e-05, |
|
"loss": 6.0875, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0018630647414997672, |
|
"grad_norm": 4.453341960906982, |
|
"learning_rate": 6.0179999999999996e-05, |
|
"loss": 6.0478, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002173575531749728, |
|
"grad_norm": 31.29482650756836, |
|
"learning_rate": 7.021e-05, |
|
"loss": 6.1593, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0024840863219996894, |
|
"grad_norm": 5.259471893310547, |
|
"learning_rate": 8.024e-05, |
|
"loss": 6.2802, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0027945971122496508, |
|
"grad_norm": 28.345125198364258, |
|
"learning_rate": 9.027e-05, |
|
"loss": 6.4323, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0031051079024996117, |
|
"grad_norm": 1.65045964717865, |
|
"learning_rate": 0.0001003, |
|
"loss": 5.9546, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003415618692749573, |
|
"grad_norm": 13.602124214172363, |
|
"learning_rate": 9.97721052631579e-05, |
|
"loss": 6.123, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0037261294829995344, |
|
"grad_norm": 11.951309204101562, |
|
"learning_rate": 9.924421052631578e-05, |
|
"loss": 5.9864, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004036640273249495, |
|
"grad_norm": 36.824344635009766, |
|
"learning_rate": 9.871631578947368e-05, |
|
"loss": 6.1966, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004347151063499456, |
|
"grad_norm": 8.036162376403809, |
|
"learning_rate": 9.818842105263158e-05, |
|
"loss": 6.3172, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004657661853749418, |
|
"grad_norm": 21.814775466918945, |
|
"learning_rate": 9.766052631578948e-05, |
|
"loss": 6.2402, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004968172643999379, |
|
"grad_norm": 12.35395336151123, |
|
"learning_rate": 9.713263157894736e-05, |
|
"loss": 6.067, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00527868343424934, |
|
"grad_norm": 3.0644829273223877, |
|
"learning_rate": 9.660473684210526e-05, |
|
"loss": 5.9228, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0055891942244993015, |
|
"grad_norm": 7.201811790466309, |
|
"learning_rate": 9.607684210526316e-05, |
|
"loss": 5.9131, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0058997050147492625, |
|
"grad_norm": 4.769659042358398, |
|
"learning_rate": 9.554894736842104e-05, |
|
"loss": 6.0645, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.006210215804999223, |
|
"grad_norm": 1.7823907136917114, |
|
"learning_rate": 9.502105263157894e-05, |
|
"loss": 5.857, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006520726595249185, |
|
"grad_norm": 1.8709537982940674, |
|
"learning_rate": 9.449315789473684e-05, |
|
"loss": 6.0435, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.006831237385499146, |
|
"grad_norm": 3.4857585430145264, |
|
"learning_rate": 9.396526315789474e-05, |
|
"loss": 5.9443, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.007141748175749107, |
|
"grad_norm": 16.777128219604492, |
|
"learning_rate": 9.343736842105264e-05, |
|
"loss": 6.1941, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.007452258965999069, |
|
"grad_norm": 4.266728401184082, |
|
"learning_rate": 9.290947368421052e-05, |
|
"loss": 6.0035, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00776276975624903, |
|
"grad_norm": 7.070564270019531, |
|
"learning_rate": 9.238157894736842e-05, |
|
"loss": 6.0351, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00807328054649899, |
|
"grad_norm": 31.87989044189453, |
|
"learning_rate": 9.18536842105263e-05, |
|
"loss": 6.2323, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.008383791336748951, |
|
"grad_norm": 2.6480438709259033, |
|
"learning_rate": 9.132578947368422e-05, |
|
"loss": 6.0649, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.008694302126998912, |
|
"grad_norm": 2.978304386138916, |
|
"learning_rate": 9.07978947368421e-05, |
|
"loss": 5.9796, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.009004812917248875, |
|
"grad_norm": 2.9000942707061768, |
|
"learning_rate": 9.027e-05, |
|
"loss": 5.9194, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.009315323707498836, |
|
"grad_norm": 16.504867553710938, |
|
"learning_rate": 8.97421052631579e-05, |
|
"loss": 6.226, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009625834497748797, |
|
"grad_norm": 2.9846396446228027, |
|
"learning_rate": 8.921421052631578e-05, |
|
"loss": 5.9803, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.009936345287998758, |
|
"grad_norm": 1.9557408094406128, |
|
"learning_rate": 8.868631578947368e-05, |
|
"loss": 5.9391, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.010246856078248719, |
|
"grad_norm": 5.9934821128845215, |
|
"learning_rate": 8.815842105263157e-05, |
|
"loss": 5.9945, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01055736686849868, |
|
"grad_norm": 3.079267978668213, |
|
"learning_rate": 8.763052631578948e-05, |
|
"loss": 5.8665, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.010867877658748642, |
|
"grad_norm": 2.353879928588867, |
|
"learning_rate": 8.710263157894737e-05, |
|
"loss": 5.9495, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011178388448998603, |
|
"grad_norm": 2.3978257179260254, |
|
"learning_rate": 8.657473684210526e-05, |
|
"loss": 5.9834, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.011488899239248564, |
|
"grad_norm": 1.9142018556594849, |
|
"learning_rate": 8.604684210526316e-05, |
|
"loss": 6.1172, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.011799410029498525, |
|
"grad_norm": 4.943174839019775, |
|
"learning_rate": 8.551894736842105e-05, |
|
"loss": 5.8814, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.012109920819748486, |
|
"grad_norm": 2.114778518676758, |
|
"learning_rate": 8.499105263157895e-05, |
|
"loss": 6.1513, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.012420431609998447, |
|
"grad_norm": 6.381146430969238, |
|
"learning_rate": 8.446315789473683e-05, |
|
"loss": 6.0642, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01273094240024841, |
|
"grad_norm": 2.5670583248138428, |
|
"learning_rate": 8.393526315789474e-05, |
|
"loss": 5.934, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.01304145319049837, |
|
"grad_norm": 3.5908706188201904, |
|
"learning_rate": 8.340736842105263e-05, |
|
"loss": 5.9664, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.013351963980748331, |
|
"grad_norm": 4.755733013153076, |
|
"learning_rate": 8.287947368421053e-05, |
|
"loss": 5.9252, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.013662474770998292, |
|
"grad_norm": 3.1427011489868164, |
|
"learning_rate": 8.235157894736842e-05, |
|
"loss": 5.8733, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.013972985561248253, |
|
"grad_norm": 4.566623687744141, |
|
"learning_rate": 8.182368421052631e-05, |
|
"loss": 5.9674, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014283496351498214, |
|
"grad_norm": 1.8994089365005493, |
|
"learning_rate": 8.129578947368421e-05, |
|
"loss": 5.8891, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.014594007141748177, |
|
"grad_norm": 2.4542181491851807, |
|
"learning_rate": 8.07678947368421e-05, |
|
"loss": 6.0727, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.014904517931998137, |
|
"grad_norm": 3.8890998363494873, |
|
"learning_rate": 8.024e-05, |
|
"loss": 5.7988, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.015215028722248098, |
|
"grad_norm": 6.967671871185303, |
|
"learning_rate": 7.97121052631579e-05, |
|
"loss": 5.9372, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.01552553951249806, |
|
"grad_norm": 3.1538615226745605, |
|
"learning_rate": 7.918421052631579e-05, |
|
"loss": 6.0131, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01552553951249806, |
|
"eval_loss": 2.966737747192383, |
|
"eval_runtime": 174.432, |
|
"eval_samples_per_second": 7.774, |
|
"eval_steps_per_second": 1.943, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1286095383691264e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|