mrferr3t's picture
Training in progress, step 1000, checkpoint
4fae396 verified
raw
history blame
8.32 kB
{
"best_metric": 0.5773412585258484,
"best_model_checkpoint": "miner_id_24/checkpoint-880",
"epoch": 1.0362694300518134,
"eval_steps": 40,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0010362694300518134,
"eval_loss": 0.742955207824707,
"eval_runtime": 20.981,
"eval_samples_per_second": 9.723,
"eval_steps_per_second": 0.334,
"step": 1
},
{
"epoch": 0.04145077720207254,
"eval_loss": 0.6771278977394104,
"eval_runtime": 21.0227,
"eval_samples_per_second": 9.704,
"eval_steps_per_second": 0.333,
"step": 40
},
{
"epoch": 0.08290155440414508,
"eval_loss": 0.6422113180160522,
"eval_runtime": 20.9905,
"eval_samples_per_second": 9.719,
"eval_steps_per_second": 0.333,
"step": 80
},
{
"epoch": 0.10362694300518134,
"grad_norm": 0.38452568650245667,
"learning_rate": 0.00019999999999999998,
"loss": 1.3417,
"step": 100
},
{
"epoch": 0.12435233160621761,
"eval_loss": 0.6342259645462036,
"eval_runtime": 21.0605,
"eval_samples_per_second": 9.686,
"eval_steps_per_second": 0.332,
"step": 120
},
{
"epoch": 0.16580310880829016,
"eval_loss": 0.6262779831886292,
"eval_runtime": 21.1055,
"eval_samples_per_second": 9.666,
"eval_steps_per_second": 0.332,
"step": 160
},
{
"epoch": 0.20725388601036268,
"grad_norm": 0.41497528553009033,
"learning_rate": 0.0002999992001464462,
"loss": 1.2346,
"step": 200
},
{
"epoch": 0.20725388601036268,
"eval_loss": 0.6233254075050354,
"eval_runtime": 21.0436,
"eval_samples_per_second": 9.694,
"eval_steps_per_second": 0.333,
"step": 200
},
{
"epoch": 0.24870466321243523,
"eval_loss": 0.619152307510376,
"eval_runtime": 21.0068,
"eval_samples_per_second": 9.711,
"eval_steps_per_second": 0.333,
"step": 240
},
{
"epoch": 0.29015544041450775,
"eval_loss": 0.6167010068893433,
"eval_runtime": 21.0018,
"eval_samples_per_second": 9.713,
"eval_steps_per_second": 0.333,
"step": 280
},
{
"epoch": 0.31088082901554404,
"grad_norm": 0.6404178738594055,
"learning_rate": 0.0002999928013691971,
"loss": 1.2428,
"step": 300
},
{
"epoch": 0.3316062176165803,
"eval_loss": 0.6130599975585938,
"eval_runtime": 21.0415,
"eval_samples_per_second": 9.695,
"eval_steps_per_second": 0.333,
"step": 320
},
{
"epoch": 0.37305699481865284,
"eval_loss": 0.611344039440155,
"eval_runtime": 21.0579,
"eval_samples_per_second": 9.688,
"eval_steps_per_second": 0.332,
"step": 360
},
{
"epoch": 0.41450777202072536,
"grad_norm": 0.5714089870452881,
"learning_rate": 0.00029998000408766265,
"loss": 1.1918,
"step": 400
},
{
"epoch": 0.41450777202072536,
"eval_loss": 0.605933666229248,
"eval_runtime": 21.0463,
"eval_samples_per_second": 9.693,
"eval_steps_per_second": 0.333,
"step": 400
},
{
"epoch": 0.45595854922279794,
"eval_loss": 0.6033980846405029,
"eval_runtime": 21.0016,
"eval_samples_per_second": 9.714,
"eval_steps_per_second": 0.333,
"step": 440
},
{
"epoch": 0.49740932642487046,
"eval_loss": 0.6009693741798401,
"eval_runtime": 20.9877,
"eval_samples_per_second": 9.72,
"eval_steps_per_second": 0.334,
"step": 480
},
{
"epoch": 0.5181347150259067,
"grad_norm": 0.7262830138206482,
"learning_rate": 0.00029996080884775884,
"loss": 1.2162,
"step": 500
},
{
"epoch": 0.538860103626943,
"eval_loss": 0.6007779836654663,
"eval_runtime": 21.0336,
"eval_samples_per_second": 9.699,
"eval_steps_per_second": 0.333,
"step": 520
},
{
"epoch": 0.5803108808290155,
"eval_loss": 0.5946472883224487,
"eval_runtime": 20.9844,
"eval_samples_per_second": 9.721,
"eval_steps_per_second": 0.334,
"step": 560
},
{
"epoch": 0.6217616580310881,
"grad_norm": 0.95758455991745,
"learning_rate": 0.0002999352164683305,
"loss": 1.1867,
"step": 600
},
{
"epoch": 0.6217616580310881,
"eval_loss": 0.5968028903007507,
"eval_runtime": 21.0097,
"eval_samples_per_second": 9.71,
"eval_steps_per_second": 0.333,
"step": 600
},
{
"epoch": 0.6632124352331606,
"eval_loss": 0.5911960005760193,
"eval_runtime": 20.987,
"eval_samples_per_second": 9.72,
"eval_steps_per_second": 0.334,
"step": 640
},
{
"epoch": 0.7046632124352331,
"eval_loss": 0.5886518955230713,
"eval_runtime": 21.0007,
"eval_samples_per_second": 9.714,
"eval_steps_per_second": 0.333,
"step": 680
},
{
"epoch": 0.7253886010362695,
"grad_norm": 0.5711341500282288,
"learning_rate": 0.00029990322804111623,
"loss": 1.1763,
"step": 700
},
{
"epoch": 0.7461139896373057,
"eval_loss": 0.5855283141136169,
"eval_runtime": 21.0019,
"eval_samples_per_second": 9.713,
"eval_steps_per_second": 0.333,
"step": 720
},
{
"epoch": 0.7875647668393783,
"eval_loss": 0.5841448903083801,
"eval_runtime": 20.994,
"eval_samples_per_second": 9.717,
"eval_steps_per_second": 0.333,
"step": 760
},
{
"epoch": 0.8290155440414507,
"grad_norm": 0.7626607418060303,
"learning_rate": 0.00029986484493070223,
"loss": 1.1783,
"step": 800
},
{
"epoch": 0.8290155440414507,
"eval_loss": 0.5830583572387695,
"eval_runtime": 21.0001,
"eval_samples_per_second": 9.714,
"eval_steps_per_second": 0.333,
"step": 800
},
{
"epoch": 0.8704663212435233,
"eval_loss": 0.5801699757575989,
"eval_runtime": 21.0085,
"eval_samples_per_second": 9.71,
"eval_steps_per_second": 0.333,
"step": 840
},
{
"epoch": 0.9119170984455959,
"eval_loss": 0.5773412585258484,
"eval_runtime": 21.0062,
"eval_samples_per_second": 9.711,
"eval_steps_per_second": 0.333,
"step": 880
},
{
"epoch": 0.9326424870466321,
"grad_norm": 0.8522987961769104,
"learning_rate": 0.00029982006877446357,
"loss": 1.1443,
"step": 900
},
{
"epoch": 0.9533678756476683,
"eval_loss": 0.5795757174491882,
"eval_runtime": 21.0572,
"eval_samples_per_second": 9.688,
"eval_steps_per_second": 0.332,
"step": 920
},
{
"epoch": 0.9948186528497409,
"eval_loss": 0.5778769254684448,
"eval_runtime": 20.9986,
"eval_samples_per_second": 9.715,
"eval_steps_per_second": 0.333,
"step": 960
},
{
"epoch": 1.0362694300518134,
"grad_norm": 0.8339794278144836,
"learning_rate": 0.000299768901482495,
"loss": 1.0328,
"step": 1000
},
{
"epoch": 1.0362694300518134,
"eval_loss": 0.5930935740470886,
"eval_runtime": 21.0358,
"eval_samples_per_second": 9.698,
"eval_steps_per_second": 0.333,
"step": 1000
}
],
"logging_steps": 100,
"max_steps": 48250,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 40,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.258924645482496e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}