|
{ |
|
"best_metric": 2.2613911628723145, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.6745362563237775, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013490725126475547, |
|
"grad_norm": 85.44503784179688, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6571, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.013490725126475547, |
|
"eval_loss": 3.286269187927246, |
|
"eval_runtime": 33.0276, |
|
"eval_samples_per_second": 15.139, |
|
"eval_steps_per_second": 1.907, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.026981450252951095, |
|
"grad_norm": 95.18113708496094, |
|
"learning_rate": 0.0001, |
|
"loss": 2.9702, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04047217537942664, |
|
"grad_norm": 77.65520477294922, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 2.7021, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05396290050590219, |
|
"grad_norm": 135.69296264648438, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 2.7404, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.06745362563237774, |
|
"grad_norm": 84.60151672363281, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 2.6488, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08094435075885328, |
|
"grad_norm": 80.55194091796875, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 2.6615, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.09443507588532883, |
|
"grad_norm": 108.89022827148438, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 2.6358, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.10792580101180438, |
|
"grad_norm": 79.09467315673828, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 2.5546, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.12141652613827993, |
|
"grad_norm": 87.56582641601562, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 2.7197, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13490725126475547, |
|
"grad_norm": 93.86714172363281, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 2.4651, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14839797639123103, |
|
"grad_norm": 41.895912170410156, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 2.5984, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.16188870151770657, |
|
"grad_norm": 86.19235229492188, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 2.5576, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.17537942664418213, |
|
"grad_norm": 76.61930847167969, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 2.6488, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.18887015177065766, |
|
"grad_norm": 58.94124984741211, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 2.3962, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.20236087689713322, |
|
"grad_norm": 61.75034713745117, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 2.4865, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.21585160202360876, |
|
"grad_norm": 69.10595703125, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 2.3925, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.22934232715008432, |
|
"grad_norm": 58.51557159423828, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 2.5001, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.24283305227655985, |
|
"grad_norm": 40.6702880859375, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 2.4163, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2563237774030354, |
|
"grad_norm": 50.67131042480469, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 2.2799, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.26981450252951095, |
|
"grad_norm": 54.50019836425781, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 2.3563, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.28330522765598654, |
|
"grad_norm": 35.64760208129883, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 2.2526, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.29679595278246207, |
|
"grad_norm": 54.67503356933594, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 2.4552, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3102866779089376, |
|
"grad_norm": 79.10367584228516, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 2.4652, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.32377740303541314, |
|
"grad_norm": 67.79601287841797, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 2.2297, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3372681281618887, |
|
"grad_norm": 49.42949295043945, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 2.3913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3372681281618887, |
|
"eval_loss": 2.9423482418060303, |
|
"eval_runtime": 32.9894, |
|
"eval_samples_per_second": 15.156, |
|
"eval_steps_per_second": 1.91, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.35075885328836426, |
|
"grad_norm": 327.58599853515625, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0293, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.3642495784148398, |
|
"grad_norm": 362.6495056152344, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 3.3553, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.3777403035413153, |
|
"grad_norm": 316.2618103027344, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 2.9598, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3912310286677909, |
|
"grad_norm": 237.7189178466797, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 2.7075, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.40472175379426645, |
|
"grad_norm": 114.92796325683594, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 2.4807, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.418212478920742, |
|
"grad_norm": 48.19389724731445, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 2.4164, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.4317032040472175, |
|
"grad_norm": 71.13121032714844, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 2.3391, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.4451939291736931, |
|
"grad_norm": 78.83213806152344, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 2.2943, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.45868465430016864, |
|
"grad_norm": 71.80233001708984, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 2.3639, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.47217537942664417, |
|
"grad_norm": 74.17680358886719, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 2.4181, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.4856661045531197, |
|
"grad_norm": 69.19786071777344, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 2.2874, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.4991568296795953, |
|
"grad_norm": 66.0870132446289, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 2.34, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5126475548060708, |
|
"grad_norm": 43.74034881591797, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 2.3367, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5261382799325464, |
|
"grad_norm": 63.72829055786133, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 2.2909, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.5396290050590219, |
|
"grad_norm": 79.226318359375, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 2.2279, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5531197301854974, |
|
"grad_norm": 74.6691665649414, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 2.232, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.5666104553119731, |
|
"grad_norm": 71.57418823242188, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 2.1768, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.5801011804384486, |
|
"grad_norm": 63.76176452636719, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 2.3299, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.5935919055649241, |
|
"grad_norm": 51.7007942199707, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 2.2503, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6070826306913997, |
|
"grad_norm": 39.83833694458008, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 2.3118, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6205733558178752, |
|
"grad_norm": 38.03684616088867, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 2.3047, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.6340640809443507, |
|
"grad_norm": 34.288970947265625, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 2.2329, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.6475548060708263, |
|
"grad_norm": 31.958253860473633, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 2.2962, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.6610455311973018, |
|
"grad_norm": 39.0740966796875, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 2.3091, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.6745362563237775, |
|
"grad_norm": 87.91134643554688, |
|
"learning_rate": 0.0, |
|
"loss": 2.4479, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6745362563237775, |
|
"eval_loss": 2.2613911628723145, |
|
"eval_runtime": 33.0075, |
|
"eval_samples_per_second": 15.148, |
|
"eval_steps_per_second": 1.909, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.658027780734976e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|