|
{ |
|
"best_metric": 0.7788119912147522, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-250", |
|
"epoch": 0.29420417769932333, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011768167107972932, |
|
"eval_loss": 1.4336028099060059, |
|
"eval_runtime": 63.7044, |
|
"eval_samples_per_second": 22.479, |
|
"eval_steps_per_second": 5.62, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011768167107972934, |
|
"grad_norm": 1.2304390668869019, |
|
"learning_rate": 0.0002, |
|
"loss": 7.4146, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.023536334215945868, |
|
"grad_norm": 0.6472563743591309, |
|
"learning_rate": 0.0001998582695676762, |
|
"loss": 6.0483, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0353045013239188, |
|
"grad_norm": 0.7562745213508606, |
|
"learning_rate": 0.00019943348002101371, |
|
"loss": 3.9027, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.047072668431891736, |
|
"grad_norm": 0.8033158779144287, |
|
"learning_rate": 0.00019872683547213446, |
|
"loss": 1.8585, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05884083553986467, |
|
"grad_norm": 1.7948169708251953, |
|
"learning_rate": 0.00019774033898178667, |
|
"loss": 1.7989, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05884083553986467, |
|
"eval_loss": 0.9343328475952148, |
|
"eval_runtime": 64.9646, |
|
"eval_samples_per_second": 22.043, |
|
"eval_steps_per_second": 5.511, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0706090026478376, |
|
"grad_norm": 0.8677065968513489, |
|
"learning_rate": 0.0001964767868814516, |
|
"loss": 6.2807, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08237716975581054, |
|
"grad_norm": 0.7287414073944092, |
|
"learning_rate": 0.00019493976084683813, |
|
"loss": 4.9082, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09414533686378347, |
|
"grad_norm": 1.209147572517395, |
|
"learning_rate": 0.00019313361774523385, |
|
"loss": 3.0766, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1059135039717564, |
|
"grad_norm": 0.9230589866638184, |
|
"learning_rate": 0.00019106347728549135, |
|
"loss": 1.5117, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11768167107972934, |
|
"grad_norm": 2.2932982444763184, |
|
"learning_rate": 0.00018873520750565718, |
|
"loss": 1.5895, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11768167107972934, |
|
"eval_loss": 0.8446639180183411, |
|
"eval_runtime": 64.7346, |
|
"eval_samples_per_second": 22.121, |
|
"eval_steps_per_second": 5.53, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12944983818770225, |
|
"grad_norm": 0.867832362651825, |
|
"learning_rate": 0.0001861554081393806, |
|
"loss": 6.0238, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1412180052956752, |
|
"grad_norm": 0.8156207203865051, |
|
"learning_rate": 0.0001833313919082515, |
|
"loss": 4.7902, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.15298617240364812, |
|
"grad_norm": 0.802191436290741, |
|
"learning_rate": 0.00018027116379309638, |
|
"loss": 2.8416, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16475433951162108, |
|
"grad_norm": 1.274532675743103, |
|
"learning_rate": 0.00017698339834299061, |
|
"loss": 1.3511, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.176522506619594, |
|
"grad_norm": 1.8214877843856812, |
|
"learning_rate": 0.00017347741508630672, |
|
"loss": 1.4454, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.176522506619594, |
|
"eval_loss": 0.8176446557044983, |
|
"eval_runtime": 64.9157, |
|
"eval_samples_per_second": 22.059, |
|
"eval_steps_per_second": 5.515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18829067372756694, |
|
"grad_norm": 0.7749770283699036, |
|
"learning_rate": 0.0001697631521134985, |
|
"loss": 5.9457, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.20005884083553988, |
|
"grad_norm": 0.843367338180542, |
|
"learning_rate": 0.00016585113790650388, |
|
"loss": 4.737, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2118270079435128, |
|
"grad_norm": 0.7175513505935669, |
|
"learning_rate": 0.0001617524614946192, |
|
"loss": 2.8268, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.22359517505148574, |
|
"grad_norm": 0.8358870148658752, |
|
"learning_rate": 0.0001574787410214407, |
|
"loss": 1.3233, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.23536334215945867, |
|
"grad_norm": 2.047827959060669, |
|
"learning_rate": 0.00015304209081197425, |
|
"loss": 1.4239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23536334215945867, |
|
"eval_loss": 0.7979318499565125, |
|
"eval_runtime": 64.8822, |
|
"eval_samples_per_second": 22.071, |
|
"eval_steps_per_second": 5.518, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2471315092674316, |
|
"grad_norm": 0.7978292107582092, |
|
"learning_rate": 0.00014845508703326504, |
|
"loss": 5.7924, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.2588996763754045, |
|
"grad_norm": 0.9069154858589172, |
|
"learning_rate": 0.00014373073204588556, |
|
"loss": 4.7768, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.27066784348337747, |
|
"grad_norm": 0.7871260643005371, |
|
"learning_rate": 0.00013888241754733208, |
|
"loss": 2.6733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2824360105913504, |
|
"grad_norm": 0.7734578847885132, |
|
"learning_rate": 0.00013392388661180303, |
|
"loss": 1.2857, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.29420417769932333, |
|
"grad_norm": 1.4109017848968506, |
|
"learning_rate": 0.0001288691947339621, |
|
"loss": 1.3427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29420417769932333, |
|
"eval_loss": 0.7788119912147522, |
|
"eval_runtime": 65.0714, |
|
"eval_samples_per_second": 22.007, |
|
"eval_steps_per_second": 5.502, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6845926844019507e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|