|
{ |
|
"best_metric": 1.2871240377426147, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.008165931732810714, |
|
"eval_steps": 50, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016331863465621427, |
|
"grad_norm": 1.671673059463501, |
|
"learning_rate": 1.001e-05, |
|
"loss": 1.0081, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016331863465621427, |
|
"eval_loss": 1.9000940322875977, |
|
"eval_runtime": 345.3153, |
|
"eval_samples_per_second": 7.466, |
|
"eval_steps_per_second": 1.868, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00032663726931242854, |
|
"grad_norm": 2.1853973865509033, |
|
"learning_rate": 2.002e-05, |
|
"loss": 1.0249, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004899559039686428, |
|
"grad_norm": 2.061541795730591, |
|
"learning_rate": 3.0029999999999995e-05, |
|
"loss": 1.0868, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006532745386248571, |
|
"grad_norm": 2.0026113986968994, |
|
"learning_rate": 4.004e-05, |
|
"loss": 1.2093, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008165931732810714, |
|
"grad_norm": 1.3478747606277466, |
|
"learning_rate": 5.005e-05, |
|
"loss": 0.9586, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009799118079372856, |
|
"grad_norm": 1.2198477983474731, |
|
"learning_rate": 6.005999999999999e-05, |
|
"loss": 1.001, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011432304425935, |
|
"grad_norm": 1.0008301734924316, |
|
"learning_rate": 7.006999999999998e-05, |
|
"loss": 1.0274, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0013065490772497142, |
|
"grad_norm": 0.9373151063919067, |
|
"learning_rate": 8.008e-05, |
|
"loss": 1.0383, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014698677119059284, |
|
"grad_norm": 0.984042763710022, |
|
"learning_rate": 9.009e-05, |
|
"loss": 0.9748, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016331863465621427, |
|
"grad_norm": 0.9123459458351135, |
|
"learning_rate": 0.0001001, |
|
"loss": 1.0147, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001796504981218357, |
|
"grad_norm": 0.941821277141571, |
|
"learning_rate": 9.957315789473684e-05, |
|
"loss": 0.8924, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019598236158745713, |
|
"grad_norm": 0.8732932209968567, |
|
"learning_rate": 9.904631578947367e-05, |
|
"loss": 0.9978, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0021231422505307855, |
|
"grad_norm": 0.8224309086799622, |
|
"learning_rate": 9.851947368421052e-05, |
|
"loss": 1.064, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002286460885187, |
|
"grad_norm": 0.9332011342048645, |
|
"learning_rate": 9.799263157894736e-05, |
|
"loss": 1.0589, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002449779519843214, |
|
"grad_norm": 0.9349180459976196, |
|
"learning_rate": 9.746578947368421e-05, |
|
"loss": 1.1028, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0026130981544994283, |
|
"grad_norm": 0.8387799263000488, |
|
"learning_rate": 9.693894736842104e-05, |
|
"loss": 1.0083, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0027764167891556426, |
|
"grad_norm": 0.8592500686645508, |
|
"learning_rate": 9.641210526315789e-05, |
|
"loss": 1.0184, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.002939735423811857, |
|
"grad_norm": 0.8980984687805176, |
|
"learning_rate": 9.588526315789473e-05, |
|
"loss": 0.937, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003103054058468071, |
|
"grad_norm": 0.8308787941932678, |
|
"learning_rate": 9.535842105263157e-05, |
|
"loss": 0.9237, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0032663726931242854, |
|
"grad_norm": 1.0007582902908325, |
|
"learning_rate": 9.483157894736841e-05, |
|
"loss": 1.0406, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0034296913277804997, |
|
"grad_norm": 1.081726312637329, |
|
"learning_rate": 9.430473684210526e-05, |
|
"loss": 1.007, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.003593009962436714, |
|
"grad_norm": 0.9985299110412598, |
|
"learning_rate": 9.37778947368421e-05, |
|
"loss": 1.0914, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0037563285970929282, |
|
"grad_norm": 1.0678647756576538, |
|
"learning_rate": 9.325105263157894e-05, |
|
"loss": 1.2774, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0039196472317491425, |
|
"grad_norm": 1.0457429885864258, |
|
"learning_rate": 9.272421052631578e-05, |
|
"loss": 0.9798, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004082965866405357, |
|
"grad_norm": 1.1700959205627441, |
|
"learning_rate": 9.219736842105263e-05, |
|
"loss": 1.2144, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004246284501061571, |
|
"grad_norm": 1.3236618041992188, |
|
"learning_rate": 9.167052631578946e-05, |
|
"loss": 1.1766, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004409603135717786, |
|
"grad_norm": 1.2073756456375122, |
|
"learning_rate": 9.114368421052632e-05, |
|
"loss": 0.8459, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004572921770374, |
|
"grad_norm": 1.3933292627334595, |
|
"learning_rate": 9.061684210526315e-05, |
|
"loss": 1.0467, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004736240405030214, |
|
"grad_norm": 1.4482253789901733, |
|
"learning_rate": 9.009e-05, |
|
"loss": 1.1401, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004899559039686428, |
|
"grad_norm": 1.365537405014038, |
|
"learning_rate": 8.956315789473683e-05, |
|
"loss": 0.9932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005062877674342643, |
|
"grad_norm": 1.475220799446106, |
|
"learning_rate": 8.903631578947368e-05, |
|
"loss": 1.2499, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005226196308998857, |
|
"grad_norm": 1.531992793083191, |
|
"learning_rate": 8.850947368421052e-05, |
|
"loss": 1.0115, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005389514943655071, |
|
"grad_norm": 2.305574655532837, |
|
"learning_rate": 8.798263157894736e-05, |
|
"loss": 0.9368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.005552833578311285, |
|
"grad_norm": 1.7354161739349365, |
|
"learning_rate": 8.745578947368422e-05, |
|
"loss": 1.049, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0057161522129675, |
|
"grad_norm": 2.7026562690734863, |
|
"learning_rate": 8.692894736842105e-05, |
|
"loss": 1.3359, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005879470847623714, |
|
"grad_norm": 2.1211535930633545, |
|
"learning_rate": 8.64021052631579e-05, |
|
"loss": 1.2523, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0060427894822799285, |
|
"grad_norm": 2.5288071632385254, |
|
"learning_rate": 8.587526315789473e-05, |
|
"loss": 1.43, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006206108116936142, |
|
"grad_norm": 2.433478832244873, |
|
"learning_rate": 8.534842105263157e-05, |
|
"loss": 1.214, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006369426751592357, |
|
"grad_norm": 2.750303268432617, |
|
"learning_rate": 8.482157894736842e-05, |
|
"loss": 1.0099, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.006532745386248571, |
|
"grad_norm": 2.981351375579834, |
|
"learning_rate": 8.429473684210525e-05, |
|
"loss": 1.6271, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0066960640209047856, |
|
"grad_norm": 3.7224624156951904, |
|
"learning_rate": 8.376789473684211e-05, |
|
"loss": 1.733, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006859382655560999, |
|
"grad_norm": 3.8278751373291016, |
|
"learning_rate": 8.324105263157894e-05, |
|
"loss": 1.5315, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007022701290217214, |
|
"grad_norm": 2.7715137004852295, |
|
"learning_rate": 8.271421052631579e-05, |
|
"loss": 1.103, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.007186019924873428, |
|
"grad_norm": 4.1672892570495605, |
|
"learning_rate": 8.218736842105262e-05, |
|
"loss": 1.6296, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007349338559529643, |
|
"grad_norm": 4.840080261230469, |
|
"learning_rate": 8.166052631578947e-05, |
|
"loss": 1.3456, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0075126571941858565, |
|
"grad_norm": 4.377506256103516, |
|
"learning_rate": 8.113368421052631e-05, |
|
"loss": 1.6744, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007675975828842071, |
|
"grad_norm": 4.180212497711182, |
|
"learning_rate": 8.060684210526315e-05, |
|
"loss": 1.4903, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007839294463498285, |
|
"grad_norm": 4.473518371582031, |
|
"learning_rate": 8.008e-05, |
|
"loss": 2.0731, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008002613098154499, |
|
"grad_norm": 7.101559162139893, |
|
"learning_rate": 7.955315789473684e-05, |
|
"loss": 2.0784, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008165931732810714, |
|
"grad_norm": 12.615863800048828, |
|
"learning_rate": 7.902631578947368e-05, |
|
"loss": 3.4556, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008165931732810714, |
|
"eval_loss": 1.2871240377426147, |
|
"eval_runtime": 345.3086, |
|
"eval_samples_per_second": 7.466, |
|
"eval_steps_per_second": 1.868, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1806176530333696e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|