|
{ |
|
"best_metric": 2.54923321563183e-07, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-720", |
|
"epoch": 4.848484848484849, |
|
"eval_steps": 20, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006734006734006734, |
|
"eval_loss": 0.47625598311424255, |
|
"eval_runtime": 4.3305, |
|
"eval_samples_per_second": 28.865, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13468013468013468, |
|
"eval_loss": 5.0069978897226974e-05, |
|
"eval_runtime": 4.3728, |
|
"eval_samples_per_second": 28.585, |
|
"eval_steps_per_second": 0.915, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.26936026936026936, |
|
"eval_loss": 8.045045433391351e-06, |
|
"eval_runtime": 4.3441, |
|
"eval_samples_per_second": 28.775, |
|
"eval_steps_per_second": 0.921, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.40404040404040403, |
|
"eval_loss": 3.3631840778980404e-06, |
|
"eval_runtime": 4.4814, |
|
"eval_samples_per_second": 27.893, |
|
"eval_steps_per_second": 0.893, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5387205387205387, |
|
"eval_loss": 2.012430513786967e-06, |
|
"eval_runtime": 4.4241, |
|
"eval_samples_per_second": 28.254, |
|
"eval_steps_per_second": 0.904, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"grad_norm": 4.331162563175894e-05, |
|
"learning_rate": 0.0002886742611435581, |
|
"loss": 0.0221, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6734006734006734, |
|
"eval_loss": 1.4569956192644895e-06, |
|
"eval_runtime": 4.4112, |
|
"eval_samples_per_second": 28.337, |
|
"eval_steps_per_second": 0.907, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8080808080808081, |
|
"eval_loss": 1.157558699560468e-06, |
|
"eval_runtime": 4.3819, |
|
"eval_samples_per_second": 28.526, |
|
"eval_steps_per_second": 0.913, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.9427609427609428, |
|
"eval_loss": 9.559958016325254e-07, |
|
"eval_runtime": 4.3682, |
|
"eval_samples_per_second": 28.616, |
|
"eval_steps_per_second": 0.916, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0774410774410774, |
|
"eval_loss": 8.15467842585349e-07, |
|
"eval_runtime": 4.377, |
|
"eval_samples_per_second": 28.559, |
|
"eval_steps_per_second": 0.914, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.2121212121212122, |
|
"eval_loss": 7.171624929469544e-07, |
|
"eval_runtime": 4.3662, |
|
"eval_samples_per_second": 28.629, |
|
"eval_steps_per_second": 0.916, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3468013468013469, |
|
"grad_norm": 1.526053893030621e-05, |
|
"learning_rate": 0.0002522394955933096, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3468013468013469, |
|
"eval_loss": 6.367617970681749e-07, |
|
"eval_runtime": 4.4095, |
|
"eval_samples_per_second": 28.348, |
|
"eval_steps_per_second": 0.907, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4814814814814814, |
|
"eval_loss": 5.753882987846737e-07, |
|
"eval_runtime": 4.3487, |
|
"eval_samples_per_second": 28.744, |
|
"eval_steps_per_second": 0.92, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6161616161616161, |
|
"eval_loss": 5.237209279584931e-07, |
|
"eval_runtime": 4.3453, |
|
"eval_samples_per_second": 28.767, |
|
"eval_steps_per_second": 0.921, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7508417508417509, |
|
"eval_loss": 4.83989538224705e-07, |
|
"eval_runtime": 4.3646, |
|
"eval_samples_per_second": 28.64, |
|
"eval_steps_per_second": 0.916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.8855218855218854, |
|
"eval_loss": 4.489497769100126e-07, |
|
"eval_runtime": 4.3601, |
|
"eval_samples_per_second": 28.669, |
|
"eval_steps_per_second": 0.917, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"grad_norm": 1.0282737093803007e-05, |
|
"learning_rate": 0.00019721004253231048, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0202020202020203, |
|
"eval_loss": 4.185091029285104e-07, |
|
"eval_runtime": 4.3862, |
|
"eval_samples_per_second": 28.499, |
|
"eval_steps_per_second": 0.912, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.154882154882155, |
|
"eval_loss": 3.925752878330968e-07, |
|
"eval_runtime": 4.3765, |
|
"eval_samples_per_second": 28.562, |
|
"eval_steps_per_second": 0.914, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.28956228956229, |
|
"eval_loss": 3.6971795225326787e-07, |
|
"eval_runtime": 4.3977, |
|
"eval_samples_per_second": 28.424, |
|
"eval_steps_per_second": 0.91, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.4242424242424243, |
|
"eval_loss": 3.503983521113696e-07, |
|
"eval_runtime": 4.387, |
|
"eval_samples_per_second": 28.493, |
|
"eval_steps_per_second": 0.912, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.558922558922559, |
|
"eval_loss": 3.3884660410876677e-07, |
|
"eval_runtime": 4.549, |
|
"eval_samples_per_second": 27.478, |
|
"eval_steps_per_second": 0.879, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.6936026936026938, |
|
"grad_norm": 7.30554893380031e-06, |
|
"learning_rate": 0.00013359431874331886, |
|
"loss": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.6936026936026938, |
|
"eval_loss": 3.222342002118239e-07, |
|
"eval_runtime": 4.5438, |
|
"eval_samples_per_second": 27.51, |
|
"eval_steps_per_second": 0.88, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.8282828282828283, |
|
"eval_loss": 3.13235887006158e-07, |
|
"eval_runtime": 4.3887, |
|
"eval_samples_per_second": 28.482, |
|
"eval_steps_per_second": 0.911, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.962962962962963, |
|
"eval_loss": 3.009457714142627e-07, |
|
"eval_runtime": 4.3984, |
|
"eval_samples_per_second": 28.419, |
|
"eval_steps_per_second": 0.909, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.0976430976430978, |
|
"eval_loss": 2.9283941671565117e-07, |
|
"eval_runtime": 4.4076, |
|
"eval_samples_per_second": 28.36, |
|
"eval_steps_per_second": 0.908, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.2323232323232323, |
|
"eval_loss": 2.863944814635033e-07, |
|
"eval_runtime": 4.3566, |
|
"eval_samples_per_second": 28.692, |
|
"eval_steps_per_second": 0.918, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.3670033670033668, |
|
"grad_norm": 6.557375400007004e-06, |
|
"learning_rate": 7.29623588569039e-05, |
|
"loss": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.3670033670033668, |
|
"eval_loss": 2.800263985136553e-07, |
|
"eval_runtime": 4.3855, |
|
"eval_samples_per_second": 28.503, |
|
"eval_steps_per_second": 0.912, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.5016835016835017, |
|
"eval_loss": 2.736429962624243e-07, |
|
"eval_runtime": 4.3648, |
|
"eval_samples_per_second": 28.638, |
|
"eval_steps_per_second": 0.916, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"eval_loss": 2.6893604854194564e-07, |
|
"eval_runtime": 4.3884, |
|
"eval_samples_per_second": 28.484, |
|
"eval_steps_per_second": 0.912, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.771043771043771, |
|
"eval_loss": 2.6898223381977004e-07, |
|
"eval_runtime": 4.4647, |
|
"eval_samples_per_second": 27.998, |
|
"eval_steps_per_second": 0.896, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.9057239057239057, |
|
"eval_loss": 2.6613662384988857e-07, |
|
"eval_runtime": 4.3975, |
|
"eval_samples_per_second": 28.425, |
|
"eval_steps_per_second": 0.91, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 4.040404040404041, |
|
"grad_norm": 6.343952009046916e-06, |
|
"learning_rate": 2.6341528967837533e-05, |
|
"loss": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.040404040404041, |
|
"eval_loss": 2.6435236577526666e-07, |
|
"eval_runtime": 4.4258, |
|
"eval_samples_per_second": 28.244, |
|
"eval_steps_per_second": 0.904, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.175084175084175, |
|
"eval_loss": 2.6299875344193424e-07, |
|
"eval_runtime": 4.3837, |
|
"eval_samples_per_second": 28.514, |
|
"eval_steps_per_second": 0.912, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.30976430976431, |
|
"eval_loss": 2.578611599801661e-07, |
|
"eval_runtime": 4.3645, |
|
"eval_samples_per_second": 28.64, |
|
"eval_steps_per_second": 0.916, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_loss": 2.583842331205233e-07, |
|
"eval_runtime": 4.3769, |
|
"eval_samples_per_second": 28.559, |
|
"eval_steps_per_second": 0.914, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.57912457912458, |
|
"eval_loss": 2.556154470312322e-07, |
|
"eval_runtime": 4.368, |
|
"eval_samples_per_second": 28.617, |
|
"eval_steps_per_second": 0.916, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.713804713804714, |
|
"grad_norm": 5.829084329889156e-06, |
|
"learning_rate": 2.2109374505137555e-06, |
|
"loss": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.713804713804714, |
|
"eval_loss": 2.5736900965966925e-07, |
|
"eval_runtime": 4.3762, |
|
"eval_samples_per_second": 28.563, |
|
"eval_steps_per_second": 0.914, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.848484848484849, |
|
"eval_loss": 2.54923321563183e-07, |
|
"eval_runtime": 4.3625, |
|
"eval_samples_per_second": 28.653, |
|
"eval_steps_per_second": 0.917, |
|
"step": 720 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 740, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.040487007898829e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|