lesso18's picture
Training in progress, step 200, checkpoint
e6fa877 verified
{
"best_metric": 2.153864622116089,
"best_model_checkpoint": "miner_id_24/checkpoint-50",
"epoch": 0.04828002414001207,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00024140012070006034,
"eval_loss": 2.989262819290161,
"eval_runtime": 100.8385,
"eval_samples_per_second": 17.305,
"eval_steps_per_second": 4.334,
"step": 1
},
{
"epoch": 0.0024140012070006035,
"grad_norm": 5.14186954498291,
"learning_rate": 4.36e-05,
"loss": 2.1011,
"step": 10
},
{
"epoch": 0.004828002414001207,
"grad_norm": 7.176346778869629,
"learning_rate": 8.72e-05,
"loss": 2.2516,
"step": 20
},
{
"epoch": 0.00724200362100181,
"grad_norm": 6.447731971740723,
"learning_rate": 0.0001308,
"loss": 2.0652,
"step": 30
},
{
"epoch": 0.009656004828002414,
"grad_norm": 6.7839179039001465,
"learning_rate": 0.0001744,
"loss": 2.0828,
"step": 40
},
{
"epoch": 0.012070006035003017,
"grad_norm": 7.772573947906494,
"learning_rate": 0.000218,
"loss": 2.3877,
"step": 50
},
{
"epoch": 0.012070006035003017,
"eval_loss": 2.153864622116089,
"eval_runtime": 100.9581,
"eval_samples_per_second": 17.284,
"eval_steps_per_second": 4.329,
"step": 50
},
{
"epoch": 0.01448400724200362,
"grad_norm": 3.764359712600708,
"learning_rate": 0.00021773448147832086,
"loss": 1.971,
"step": 60
},
{
"epoch": 0.016898008449004225,
"grad_norm": 5.581189155578613,
"learning_rate": 0.0002169392194928312,
"loss": 1.9339,
"step": 70
},
{
"epoch": 0.01931200965600483,
"grad_norm": 5.161837100982666,
"learning_rate": 0.00021561808847998484,
"loss": 2.2624,
"step": 80
},
{
"epoch": 0.02172601086300543,
"grad_norm": 4.681901454925537,
"learning_rate": 0.00021377752485727676,
"loss": 1.9126,
"step": 90
},
{
"epoch": 0.024140012070006035,
"grad_norm": 13.321852684020996,
"learning_rate": 0.00021142649566566402,
"loss": 2.2267,
"step": 100
},
{
"epoch": 0.024140012070006035,
"eval_loss": 2.305248737335205,
"eval_runtime": 100.7966,
"eval_samples_per_second": 17.312,
"eval_steps_per_second": 4.335,
"step": 100
},
{
"epoch": 0.026554013277006638,
"grad_norm": 4.219131946563721,
"learning_rate": 0.0002085764548830435,
"loss": 1.8166,
"step": 110
},
{
"epoch": 0.02896801448400724,
"grad_norm": 3.56708025932312,
"learning_rate": 0.00020524128762162305,
"loss": 2.1941,
"step": 120
},
{
"epoch": 0.03138201569100785,
"grad_norm": 6.397162437438965,
"learning_rate": 0.00020143724248105043,
"loss": 2.0798,
"step": 130
},
{
"epoch": 0.03379601689800845,
"grad_norm": 5.533897876739502,
"learning_rate": 0.0001971828523868693,
"loss": 1.9979,
"step": 140
},
{
"epoch": 0.036210018105009054,
"grad_norm": 7.768519878387451,
"learning_rate": 0.0001924988442999686,
"loss": 2.1741,
"step": 150
},
{
"epoch": 0.036210018105009054,
"eval_loss": 2.3202924728393555,
"eval_runtime": 100.8543,
"eval_samples_per_second": 17.302,
"eval_steps_per_second": 4.333,
"step": 150
},
{
"epoch": 0.03862401931200966,
"grad_norm": 4.070739269256592,
"learning_rate": 0.00018740803823691298,
"loss": 2.1066,
"step": 160
},
{
"epoch": 0.04103802051901026,
"grad_norm": 4.031443119049072,
"learning_rate": 0.00018193523609311556,
"loss": 2.1219,
"step": 170
},
{
"epoch": 0.04345202172601086,
"grad_norm": 5.493770599365234,
"learning_rate": 0.00017610710081049675,
"loss": 1.9506,
"step": 180
},
{
"epoch": 0.045866022933011466,
"grad_norm": 4.385707378387451,
"learning_rate": 0.00016995202647831142,
"loss": 2.0755,
"step": 190
},
{
"epoch": 0.04828002414001207,
"grad_norm": 8.468510627746582,
"learning_rate": 0.00016350000000000002,
"loss": 2.2808,
"step": 200
},
{
"epoch": 0.04828002414001207,
"eval_loss": 2.17218017578125,
"eval_runtime": 100.7675,
"eval_samples_per_second": 17.317,
"eval_steps_per_second": 4.337,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.32624144891904e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}