|
{ |
|
"best_metric": 1.4567363262176514, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-400", |
|
"epoch": 0.5009230769230769, |
|
"eval_steps": 50, |
|
"global_step": 407, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012307692307692308, |
|
"eval_loss": 1.7939045429229736, |
|
"eval_runtime": 27.5844, |
|
"eval_samples_per_second": 6.235, |
|
"eval_steps_per_second": 1.559, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012307692307692308, |
|
"grad_norm": 0.5439819097518921, |
|
"learning_rate": 4.2600000000000005e-05, |
|
"loss": 1.6815, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024615384615384615, |
|
"grad_norm": 0.6933808922767639, |
|
"learning_rate": 8.520000000000001e-05, |
|
"loss": 1.711, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.036923076923076927, |
|
"grad_norm": 0.6192981600761414, |
|
"learning_rate": 0.0001278, |
|
"loss": 1.5149, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04923076923076923, |
|
"grad_norm": 0.6547725200653076, |
|
"learning_rate": 0.00017040000000000002, |
|
"loss": 1.5514, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 0.5849578976631165, |
|
"learning_rate": 0.000213, |
|
"loss": 1.6271, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"eval_loss": 1.5897082090377808, |
|
"eval_runtime": 27.3561, |
|
"eval_samples_per_second": 6.287, |
|
"eval_steps_per_second": 1.572, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07384615384615385, |
|
"grad_norm": 0.7208651304244995, |
|
"learning_rate": 0.00021258789997222896, |
|
"loss": 1.6167, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08615384615384615, |
|
"grad_norm": 0.6750473976135254, |
|
"learning_rate": 0.0002113547891177025, |
|
"loss": 1.6708, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09846153846153846, |
|
"grad_norm": 0.7576515674591064, |
|
"learning_rate": 0.00020931021044144212, |
|
"loss": 1.6455, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11076923076923077, |
|
"grad_norm": 0.6720758080482483, |
|
"learning_rate": 0.00020646998687169697, |
|
"loss": 1.5812, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"grad_norm": 0.86794513463974, |
|
"learning_rate": 0.00020285609880681382, |
|
"loss": 1.6535, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"eval_loss": 1.6505258083343506, |
|
"eval_runtime": 27.6521, |
|
"eval_samples_per_second": 6.22, |
|
"eval_steps_per_second": 1.555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13538461538461538, |
|
"grad_norm": 0.6903553009033203, |
|
"learning_rate": 0.00019849651400964533, |
|
"loss": 1.5973, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1476923076923077, |
|
"grad_norm": 0.6118985414505005, |
|
"learning_rate": 0.00019342497116593764, |
|
"loss": 1.5198, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.553017795085907, |
|
"learning_rate": 0.0001876807187817326, |
|
"loss": 1.573, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1723076923076923, |
|
"grad_norm": 0.5370417833328247, |
|
"learning_rate": 0.00018130821144044925, |
|
"loss": 1.5846, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 0.6245121955871582, |
|
"learning_rate": 0.00017435676577029992, |
|
"loss": 1.5261, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"eval_loss": 1.544323205947876, |
|
"eval_runtime": 27.299, |
|
"eval_samples_per_second": 6.301, |
|
"eval_steps_per_second": 1.575, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.19692307692307692, |
|
"grad_norm": 0.5361499786376953, |
|
"learning_rate": 0.00016688017878450004, |
|
"loss": 1.545, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.20923076923076922, |
|
"grad_norm": 0.5619227886199951, |
|
"learning_rate": 0.00015893631154792243, |
|
"loss": 1.5772, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.22153846153846155, |
|
"grad_norm": 0.6957938075065613, |
|
"learning_rate": 0.00015058664139218787, |
|
"loss": 1.6559, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.23384615384615384, |
|
"grad_norm": 0.6685720682144165, |
|
"learning_rate": 0.00014189578614458374, |
|
"loss": 1.5962, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 0.8288447856903076, |
|
"learning_rate": 0.00013293100405278884, |
|
"loss": 1.6537, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"eval_loss": 1.5585774183273315, |
|
"eval_runtime": 27.2807, |
|
"eval_samples_per_second": 6.305, |
|
"eval_steps_per_second": 1.576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.25846153846153846, |
|
"grad_norm": 0.5318108797073364, |
|
"learning_rate": 0.0001237616732754696, |
|
"loss": 1.5638, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.27076923076923076, |
|
"grad_norm": 0.5150880217552185, |
|
"learning_rate": 0.0001144587549669542, |
|
"loss": 1.4456, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.28307692307692306, |
|
"grad_norm": 0.5536653399467468, |
|
"learning_rate": 0.00010509424411115418, |
|
"loss": 1.499, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2953846153846154, |
|
"grad_norm": 0.5529115200042725, |
|
"learning_rate": 9.574061235471257e-05, |
|
"loss": 1.4513, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.5359371304512024, |
|
"learning_rate": 8.647024715127413e-05, |
|
"loss": 1.5566, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"eval_loss": 1.494085669517517, |
|
"eval_runtime": 27.0653, |
|
"eval_samples_per_second": 6.355, |
|
"eval_steps_per_second": 1.589, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.5753491520881653, |
|
"learning_rate": 7.735489155732317e-05, |
|
"loss": 1.5377, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3323076923076923, |
|
"grad_norm": 0.5393943190574646, |
|
"learning_rate": 6.846508901498943e-05, |
|
"loss": 1.5158, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3446153846153846, |
|
"grad_norm": 0.5714206099510193, |
|
"learning_rate": 5.986963741863112e-05, |
|
"loss": 1.4651, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3569230769230769, |
|
"grad_norm": 0.5942085981369019, |
|
"learning_rate": 5.1635056690155563e-05, |
|
"loss": 1.5368, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"grad_norm": 0.7914677858352661, |
|
"learning_rate": 4.38250739834949e-05, |
|
"loss": 1.5954, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.36923076923076925, |
|
"eval_loss": 1.4894136190414429, |
|
"eval_runtime": 27.3264, |
|
"eval_samples_per_second": 6.294, |
|
"eval_steps_per_second": 1.574, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38153846153846155, |
|
"grad_norm": 0.4883591830730438, |
|
"learning_rate": 3.650013050222297e-05, |
|
"loss": 1.5128, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.39384615384615385, |
|
"grad_norm": 0.46697500348091125, |
|
"learning_rate": 2.971691374703558e-05, |
|
"loss": 1.4536, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.40615384615384614, |
|
"grad_norm": 0.476484090089798, |
|
"learning_rate": 2.3527918813016095e-05, |
|
"loss": 1.4595, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.41846153846153844, |
|
"grad_norm": 0.4911190867424011, |
|
"learning_rate": 1.7981042131792456e-05, |
|
"loss": 1.4094, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"grad_norm": 0.47842007875442505, |
|
"learning_rate": 1.3119210802601695e-05, |
|
"loss": 1.4179, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4307692307692308, |
|
"eval_loss": 1.4607428312301636, |
|
"eval_runtime": 27.3564, |
|
"eval_samples_per_second": 6.287, |
|
"eval_steps_per_second": 1.572, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4430769230769231, |
|
"grad_norm": 0.5161571502685547, |
|
"learning_rate": 8.980050380857488e-06, |
|
"loss": 1.573, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4553846153846154, |
|
"grad_norm": 0.5097351670265198, |
|
"learning_rate": 5.59559369519364e-06, |
|
"loss": 1.4451, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4676923076923077, |
|
"grad_norm": 0.5645511150360107, |
|
"learning_rate": 2.992032946439276e-06, |
|
"loss": 1.4605, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.5901855826377869, |
|
"learning_rate": 1.1895170070238124e-06, |
|
"loss": 1.5374, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 0.6915972828865051, |
|
"learning_rate": 2.019954895048029e-07, |
|
"loss": 1.6148, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"eval_loss": 1.4567363262176514, |
|
"eval_runtime": 27.6482, |
|
"eval_samples_per_second": 6.221, |
|
"eval_steps_per_second": 1.555, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 407, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.116721010222039e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|