|
{ |
|
"best_metric": 2.243114471435547, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.07436787307882994, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016526194017517766, |
|
"eval_loss": 3.1149251461029053, |
|
"eval_runtime": 43.6347, |
|
"eval_samples_per_second": 58.394, |
|
"eval_steps_per_second": 14.598, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016526194017517765, |
|
"grad_norm": 2.4044153690338135, |
|
"learning_rate": 4.36e-05, |
|
"loss": 2.4216, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003305238803503553, |
|
"grad_norm": 3.2215139865875244, |
|
"learning_rate": 8.72e-05, |
|
"loss": 2.2895, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00495785820525533, |
|
"grad_norm": 4.437511444091797, |
|
"learning_rate": 0.0001308, |
|
"loss": 2.2952, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006610477607007106, |
|
"grad_norm": 6.676159381866455, |
|
"learning_rate": 0.0001744, |
|
"loss": 2.2616, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"grad_norm": 22.480907440185547, |
|
"learning_rate": 0.000218, |
|
"loss": 2.7042, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008263097008758883, |
|
"eval_loss": 2.7512803077697754, |
|
"eval_runtime": 43.5634, |
|
"eval_samples_per_second": 58.489, |
|
"eval_steps_per_second": 14.622, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00991571641051066, |
|
"grad_norm": 2.332223415374756, |
|
"learning_rate": 0.00021773448147832086, |
|
"loss": 2.3331, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011568335812262435, |
|
"grad_norm": 2.4621620178222656, |
|
"learning_rate": 0.0002169392194928312, |
|
"loss": 2.3293, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.013220955214014212, |
|
"grad_norm": 3.450859546661377, |
|
"learning_rate": 0.00021561808847998484, |
|
"loss": 2.1836, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.014873574615765989, |
|
"grad_norm": 7.603415489196777, |
|
"learning_rate": 0.00021377752485727676, |
|
"loss": 2.2165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.016526194017517766, |
|
"grad_norm": 19.09664535522461, |
|
"learning_rate": 0.00021142649566566402, |
|
"loss": 3.0109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016526194017517766, |
|
"eval_loss": 2.7991316318511963, |
|
"eval_runtime": 43.5731, |
|
"eval_samples_per_second": 58.476, |
|
"eval_steps_per_second": 14.619, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01817881341926954, |
|
"grad_norm": 2.182637929916382, |
|
"learning_rate": 0.0002085764548830435, |
|
"loss": 2.4546, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01983143282102132, |
|
"grad_norm": 2.9767863750457764, |
|
"learning_rate": 0.00020524128762162305, |
|
"loss": 2.252, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.021484052222773095, |
|
"grad_norm": 4.451344966888428, |
|
"learning_rate": 0.00020143724248105043, |
|
"loss": 2.1921, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02313667162452487, |
|
"grad_norm": 5.990928649902344, |
|
"learning_rate": 0.0001971828523868693, |
|
"loss": 2.0887, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02478929102627665, |
|
"grad_norm": 19.66733741760254, |
|
"learning_rate": 0.0001924988442999686, |
|
"loss": 2.5636, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02478929102627665, |
|
"eval_loss": 2.7373805046081543, |
|
"eval_runtime": 43.5877, |
|
"eval_samples_per_second": 58.457, |
|
"eval_steps_per_second": 14.614, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.026441910428028424, |
|
"grad_norm": 2.0548884868621826, |
|
"learning_rate": 0.00018740803823691298, |
|
"loss": 2.4033, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.028094529829780203, |
|
"grad_norm": 2.9234230518341064, |
|
"learning_rate": 0.00018193523609311556, |
|
"loss": 2.2948, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.029747149231531978, |
|
"grad_norm": 3.7016618251800537, |
|
"learning_rate": 0.00017610710081049675, |
|
"loss": 2.3234, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03139976863328375, |
|
"grad_norm": 5.965432643890381, |
|
"learning_rate": 0.00016995202647831142, |
|
"loss": 2.2832, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03305238803503553, |
|
"grad_norm": 17.460437774658203, |
|
"learning_rate": 0.00016350000000000002, |
|
"loss": 3.0321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03305238803503553, |
|
"eval_loss": 2.5573222637176514, |
|
"eval_runtime": 43.5955, |
|
"eval_samples_per_second": 58.446, |
|
"eval_steps_per_second": 14.612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03470500743678731, |
|
"grad_norm": 2.2317683696746826, |
|
"learning_rate": 0.00015678245500000943, |
|
"loss": 2.3917, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.03635762683853908, |
|
"grad_norm": 2.6078310012817383, |
|
"learning_rate": 0.00014983211868233444, |
|
"loss": 2.3408, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03801024624029086, |
|
"grad_norm": 4.730777263641357, |
|
"learning_rate": 0.00014268285238686927, |
|
"loss": 2.2385, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03966286564204264, |
|
"grad_norm": 5.267191410064697, |
|
"learning_rate": 0.00013536948662036378, |
|
"loss": 2.4694, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04131548504379441, |
|
"grad_norm": 14.102747917175293, |
|
"learning_rate": 0.00012792765136569544, |
|
"loss": 2.8198, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04131548504379441, |
|
"eval_loss": 2.482667922973633, |
|
"eval_runtime": 43.6594, |
|
"eval_samples_per_second": 58.361, |
|
"eval_steps_per_second": 14.59, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04296810444554619, |
|
"grad_norm": 2.223804473876953, |
|
"learning_rate": 0.00012039360249617425, |
|
"loss": 2.3776, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04462072384729797, |
|
"grad_norm": 2.835205078125, |
|
"learning_rate": 0.00011280404514057264, |
|
"loss": 2.2912, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.04627334324904974, |
|
"grad_norm": 4.878274917602539, |
|
"learning_rate": 0.00010519595485942743, |
|
"loss": 2.1857, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.04792596265080152, |
|
"grad_norm": 5.728613376617432, |
|
"learning_rate": 9.76063975038258e-05, |
|
"loss": 2.2346, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.0495785820525533, |
|
"grad_norm": 16.714305877685547, |
|
"learning_rate": 9.00723486343046e-05, |
|
"loss": 2.2206, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0495785820525533, |
|
"eval_loss": 2.4008781909942627, |
|
"eval_runtime": 43.6638, |
|
"eval_samples_per_second": 58.355, |
|
"eval_steps_per_second": 14.589, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05123120145430508, |
|
"grad_norm": 2.1384823322296143, |
|
"learning_rate": 8.263051337963623e-05, |
|
"loss": 2.2682, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05288382085605685, |
|
"grad_norm": 2.6786699295043945, |
|
"learning_rate": 7.531714761313074e-05, |
|
"loss": 2.1271, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05453644025780863, |
|
"grad_norm": 4.25484037399292, |
|
"learning_rate": 6.816788131766559e-05, |
|
"loss": 2.3849, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.056189059659560406, |
|
"grad_norm": 4.951492786407471, |
|
"learning_rate": 6.121754499999055e-05, |
|
"loss": 2.1332, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.05784167906131218, |
|
"grad_norm": 14.745146751403809, |
|
"learning_rate": 5.450000000000003e-05, |
|
"loss": 2.9364, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.05784167906131218, |
|
"eval_loss": 2.2891488075256348, |
|
"eval_runtime": 43.6713, |
|
"eval_samples_per_second": 58.345, |
|
"eval_steps_per_second": 14.586, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.059494298463063956, |
|
"grad_norm": 1.9254217147827148, |
|
"learning_rate": 4.804797352168861e-05, |
|
"loss": 2.1897, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.061146917864815735, |
|
"grad_norm": 2.3489015102386475, |
|
"learning_rate": 4.189289918950325e-05, |
|
"loss": 2.2346, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.0627995372665675, |
|
"grad_norm": 3.7030725479125977, |
|
"learning_rate": 3.606476390688449e-05, |
|
"loss": 2.2318, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06445215666831929, |
|
"grad_norm": 5.087347507476807, |
|
"learning_rate": 3.0591961763087043e-05, |
|
"loss": 2.2742, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.06610477607007106, |
|
"grad_norm": 16.04279899597168, |
|
"learning_rate": 2.550115570003141e-05, |
|
"loss": 2.5913, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06610477607007106, |
|
"eval_loss": 2.2623276710510254, |
|
"eval_runtime": 43.6913, |
|
"eval_samples_per_second": 58.318, |
|
"eval_steps_per_second": 14.58, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06775739547182284, |
|
"grad_norm": 2.179595470428467, |
|
"learning_rate": 2.081714761313074e-05, |
|
"loss": 2.2334, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.06941001487357462, |
|
"grad_norm": 2.5870578289031982, |
|
"learning_rate": 1.656275751894957e-05, |
|
"loss": 2.1487, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07106263427532639, |
|
"grad_norm": 3.1885182857513428, |
|
"learning_rate": 1.275871237837696e-05, |
|
"loss": 2.0605, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07271525367707816, |
|
"grad_norm": 5.060018539428711, |
|
"learning_rate": 9.423545116956494e-06, |
|
"loss": 2.3016, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07436787307882994, |
|
"grad_norm": 12.431950569152832, |
|
"learning_rate": 6.573504334335994e-06, |
|
"loss": 2.5269, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.07436787307882994, |
|
"eval_loss": 2.243114471435547, |
|
"eval_runtime": 43.9155, |
|
"eval_samples_per_second": 58.021, |
|
"eval_steps_per_second": 14.505, |
|
"step": 450 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.13234235359232e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|