{ "best_metric": 1.2871240377426147, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.008165931732810714, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016331863465621427, "grad_norm": 1.671673059463501, "learning_rate": 1.001e-05, "loss": 1.0081, "step": 1 }, { "epoch": 0.00016331863465621427, "eval_loss": 1.9000940322875977, "eval_runtime": 345.3153, "eval_samples_per_second": 7.466, "eval_steps_per_second": 1.868, "step": 1 }, { "epoch": 0.00032663726931242854, "grad_norm": 2.1853973865509033, "learning_rate": 2.002e-05, "loss": 1.0249, "step": 2 }, { "epoch": 0.0004899559039686428, "grad_norm": 2.061541795730591, "learning_rate": 3.0029999999999995e-05, "loss": 1.0868, "step": 3 }, { "epoch": 0.0006532745386248571, "grad_norm": 2.0026113986968994, "learning_rate": 4.004e-05, "loss": 1.2093, "step": 4 }, { "epoch": 0.0008165931732810714, "grad_norm": 1.3478747606277466, "learning_rate": 5.005e-05, "loss": 0.9586, "step": 5 }, { "epoch": 0.0009799118079372856, "grad_norm": 1.2198477983474731, "learning_rate": 6.005999999999999e-05, "loss": 1.001, "step": 6 }, { "epoch": 0.0011432304425935, "grad_norm": 1.0008301734924316, "learning_rate": 7.006999999999998e-05, "loss": 1.0274, "step": 7 }, { "epoch": 0.0013065490772497142, "grad_norm": 0.9373151063919067, "learning_rate": 8.008e-05, "loss": 1.0383, "step": 8 }, { "epoch": 0.0014698677119059284, "grad_norm": 0.984042763710022, "learning_rate": 9.009e-05, "loss": 0.9748, "step": 9 }, { "epoch": 0.0016331863465621427, "grad_norm": 0.9123459458351135, "learning_rate": 0.0001001, "loss": 1.0147, "step": 10 }, { "epoch": 0.001796504981218357, "grad_norm": 0.941821277141571, "learning_rate": 9.957315789473684e-05, "loss": 0.8924, "step": 11 }, { "epoch": 0.0019598236158745713, "grad_norm": 0.8732932209968567, "learning_rate": 9.904631578947367e-05, "loss": 0.9978, "step": 12 }, { "epoch": 0.0021231422505307855, "grad_norm": 0.8224309086799622, "learning_rate": 9.851947368421052e-05, "loss": 1.064, "step": 13 }, { "epoch": 0.002286460885187, "grad_norm": 0.9332011342048645, "learning_rate": 9.799263157894736e-05, "loss": 1.0589, "step": 14 }, { "epoch": 0.002449779519843214, "grad_norm": 0.9349180459976196, "learning_rate": 9.746578947368421e-05, "loss": 1.1028, "step": 15 }, { "epoch": 0.0026130981544994283, "grad_norm": 0.8387799263000488, "learning_rate": 9.693894736842104e-05, "loss": 1.0083, "step": 16 }, { "epoch": 0.0027764167891556426, "grad_norm": 0.8592500686645508, "learning_rate": 9.641210526315789e-05, "loss": 1.0184, "step": 17 }, { "epoch": 0.002939735423811857, "grad_norm": 0.8980984687805176, "learning_rate": 9.588526315789473e-05, "loss": 0.937, "step": 18 }, { "epoch": 0.003103054058468071, "grad_norm": 0.8308787941932678, "learning_rate": 9.535842105263157e-05, "loss": 0.9237, "step": 19 }, { "epoch": 0.0032663726931242854, "grad_norm": 1.0007582902908325, "learning_rate": 9.483157894736841e-05, "loss": 1.0406, "step": 20 }, { "epoch": 0.0034296913277804997, "grad_norm": 1.081726312637329, "learning_rate": 9.430473684210526e-05, "loss": 1.007, "step": 21 }, { "epoch": 0.003593009962436714, "grad_norm": 0.9985299110412598, "learning_rate": 9.37778947368421e-05, "loss": 1.0914, "step": 22 }, { "epoch": 0.0037563285970929282, "grad_norm": 1.0678647756576538, "learning_rate": 9.325105263157894e-05, "loss": 1.2774, "step": 23 }, { "epoch": 0.0039196472317491425, "grad_norm": 1.0457429885864258, "learning_rate": 9.272421052631578e-05, "loss": 0.9798, "step": 24 }, { "epoch": 0.004082965866405357, "grad_norm": 1.1700959205627441, "learning_rate": 9.219736842105263e-05, "loss": 1.2144, "step": 25 }, { "epoch": 0.004246284501061571, "grad_norm": 1.3236618041992188, "learning_rate": 9.167052631578946e-05, "loss": 1.1766, "step": 26 }, { "epoch": 0.004409603135717786, "grad_norm": 1.2073756456375122, "learning_rate": 9.114368421052632e-05, "loss": 0.8459, "step": 27 }, { "epoch": 0.004572921770374, "grad_norm": 1.3933292627334595, "learning_rate": 9.061684210526315e-05, "loss": 1.0467, "step": 28 }, { "epoch": 0.004736240405030214, "grad_norm": 1.4482253789901733, "learning_rate": 9.009e-05, "loss": 1.1401, "step": 29 }, { "epoch": 0.004899559039686428, "grad_norm": 1.365537405014038, "learning_rate": 8.956315789473683e-05, "loss": 0.9932, "step": 30 }, { "epoch": 0.005062877674342643, "grad_norm": 1.475220799446106, "learning_rate": 8.903631578947368e-05, "loss": 1.2499, "step": 31 }, { "epoch": 0.005226196308998857, "grad_norm": 1.531992793083191, "learning_rate": 8.850947368421052e-05, "loss": 1.0115, "step": 32 }, { "epoch": 0.005389514943655071, "grad_norm": 2.305574655532837, "learning_rate": 8.798263157894736e-05, "loss": 0.9368, "step": 33 }, { "epoch": 0.005552833578311285, "grad_norm": 1.7354161739349365, "learning_rate": 8.745578947368422e-05, "loss": 1.049, "step": 34 }, { "epoch": 0.0057161522129675, "grad_norm": 2.7026562690734863, "learning_rate": 8.692894736842105e-05, "loss": 1.3359, "step": 35 }, { "epoch": 0.005879470847623714, "grad_norm": 2.1211535930633545, "learning_rate": 8.64021052631579e-05, "loss": 1.2523, "step": 36 }, { "epoch": 0.0060427894822799285, "grad_norm": 2.5288071632385254, "learning_rate": 8.587526315789473e-05, "loss": 1.43, "step": 37 }, { "epoch": 0.006206108116936142, "grad_norm": 2.433478832244873, "learning_rate": 8.534842105263157e-05, "loss": 1.214, "step": 38 }, { "epoch": 0.006369426751592357, "grad_norm": 2.750303268432617, "learning_rate": 8.482157894736842e-05, "loss": 1.0099, "step": 39 }, { "epoch": 0.006532745386248571, "grad_norm": 2.981351375579834, "learning_rate": 8.429473684210525e-05, "loss": 1.6271, "step": 40 }, { "epoch": 0.0066960640209047856, "grad_norm": 3.7224624156951904, "learning_rate": 8.376789473684211e-05, "loss": 1.733, "step": 41 }, { "epoch": 0.006859382655560999, "grad_norm": 3.8278751373291016, "learning_rate": 8.324105263157894e-05, "loss": 1.5315, "step": 42 }, { "epoch": 0.007022701290217214, "grad_norm": 2.7715137004852295, "learning_rate": 8.271421052631579e-05, "loss": 1.103, "step": 43 }, { "epoch": 0.007186019924873428, "grad_norm": 4.1672892570495605, "learning_rate": 8.218736842105262e-05, "loss": 1.6296, "step": 44 }, { "epoch": 0.007349338559529643, "grad_norm": 4.840080261230469, "learning_rate": 8.166052631578947e-05, "loss": 1.3456, "step": 45 }, { "epoch": 0.0075126571941858565, "grad_norm": 4.377506256103516, "learning_rate": 8.113368421052631e-05, "loss": 1.6744, "step": 46 }, { "epoch": 0.007675975828842071, "grad_norm": 4.180212497711182, "learning_rate": 8.060684210526315e-05, "loss": 1.4903, "step": 47 }, { "epoch": 0.007839294463498285, "grad_norm": 4.473518371582031, "learning_rate": 8.008e-05, "loss": 2.0731, "step": 48 }, { "epoch": 0.008002613098154499, "grad_norm": 7.101559162139893, "learning_rate": 7.955315789473684e-05, "loss": 2.0784, "step": 49 }, { "epoch": 0.008165931732810714, "grad_norm": 12.615863800048828, "learning_rate": 7.902631578947368e-05, "loss": 3.4556, "step": 50 }, { "epoch": 0.008165931732810714, "eval_loss": 1.2871240377426147, "eval_runtime": 345.3086, "eval_samples_per_second": 7.466, "eval_steps_per_second": 1.868, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.1806176530333696e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }