|
{ |
|
"best_metric": 2.035742998123169, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-450", |
|
"epoch": 0.1804077214504781, |
|
"eval_steps": 50, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00036081544290095615, |
|
"eval_loss": 2.921983242034912, |
|
"eval_runtime": 73.4498, |
|
"eval_samples_per_second": 15.888, |
|
"eval_steps_per_second": 3.976, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0036081544290095615, |
|
"grad_norm": 1.535165548324585, |
|
"learning_rate": 4.36e-05, |
|
"loss": 2.4652, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007216308858019123, |
|
"grad_norm": 1.88742196559906, |
|
"learning_rate": 8.72e-05, |
|
"loss": 2.3561, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010824463287028685, |
|
"grad_norm": 1.9067068099975586, |
|
"learning_rate": 0.0001308, |
|
"loss": 2.2395, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014432617716038246, |
|
"grad_norm": 2.238699436187744, |
|
"learning_rate": 0.0001744, |
|
"loss": 2.1678, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018040772145047807, |
|
"grad_norm": 4.385190963745117, |
|
"learning_rate": 0.000218, |
|
"loss": 2.1124, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.018040772145047807, |
|
"eval_loss": 2.351407289505005, |
|
"eval_runtime": 73.4767, |
|
"eval_samples_per_second": 15.883, |
|
"eval_steps_per_second": 3.974, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02164892657405737, |
|
"grad_norm": 1.5297818183898926, |
|
"learning_rate": 0.00021773448147832086, |
|
"loss": 2.0854, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025257081003066933, |
|
"grad_norm": 1.3838398456573486, |
|
"learning_rate": 0.0002169392194928312, |
|
"loss": 2.0998, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028865235432076492, |
|
"grad_norm": 2.074045181274414, |
|
"learning_rate": 0.00021561808847998484, |
|
"loss": 2.2113, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.032473389861086055, |
|
"grad_norm": 2.1741549968719482, |
|
"learning_rate": 0.00021377752485727676, |
|
"loss": 2.1485, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.036081544290095614, |
|
"grad_norm": 6.557363986968994, |
|
"learning_rate": 0.00021142649566566402, |
|
"loss": 2.1276, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.036081544290095614, |
|
"eval_loss": 2.3574182987213135, |
|
"eval_runtime": 73.3643, |
|
"eval_samples_per_second": 15.907, |
|
"eval_steps_per_second": 3.98, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03968969871910518, |
|
"grad_norm": 1.5597459077835083, |
|
"learning_rate": 0.0002085764548830435, |
|
"loss": 2.2556, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04329785314811474, |
|
"grad_norm": 1.775914192199707, |
|
"learning_rate": 0.00020524128762162305, |
|
"loss": 2.3104, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0469060075771243, |
|
"grad_norm": 1.6727547645568848, |
|
"learning_rate": 0.00020143724248105043, |
|
"loss": 2.1549, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.050514162006133866, |
|
"grad_norm": 2.4896557331085205, |
|
"learning_rate": 0.0001971828523868693, |
|
"loss": 2.1205, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.054122316435143425, |
|
"grad_norm": 4.250438213348389, |
|
"learning_rate": 0.0001924988442999686, |
|
"loss": 2.0478, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.054122316435143425, |
|
"eval_loss": 2.2884583473205566, |
|
"eval_runtime": 73.1329, |
|
"eval_samples_per_second": 15.957, |
|
"eval_steps_per_second": 3.993, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.057730470864152984, |
|
"grad_norm": 1.366658329963684, |
|
"learning_rate": 0.00018740803823691298, |
|
"loss": 2.172, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06133862529316255, |
|
"grad_norm": 1.5560624599456787, |
|
"learning_rate": 0.00018193523609311556, |
|
"loss": 2.3027, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06494677972217211, |
|
"grad_norm": 1.9186638593673706, |
|
"learning_rate": 0.00017610710081049675, |
|
"loss": 2.1255, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06855493415118168, |
|
"grad_norm": 2.413241386413574, |
|
"learning_rate": 0.00016995202647831142, |
|
"loss": 2.1701, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07216308858019123, |
|
"grad_norm": 4.694942474365234, |
|
"learning_rate": 0.00016350000000000002, |
|
"loss": 2.0427, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07216308858019123, |
|
"eval_loss": 2.2128450870513916, |
|
"eval_runtime": 73.3767, |
|
"eval_samples_per_second": 15.904, |
|
"eval_steps_per_second": 3.979, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0757712430092008, |
|
"grad_norm": 1.2760608196258545, |
|
"learning_rate": 0.00015678245500000943, |
|
"loss": 2.2211, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07937939743821036, |
|
"grad_norm": 1.7979975938796997, |
|
"learning_rate": 0.00014983211868233444, |
|
"loss": 2.3215, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08298755186721991, |
|
"grad_norm": 2.1475672721862793, |
|
"learning_rate": 0.00014268285238686927, |
|
"loss": 2.1729, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08659570629622948, |
|
"grad_norm": 2.0573699474334717, |
|
"learning_rate": 0.00013536948662036378, |
|
"loss": 2.0533, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09020386072523905, |
|
"grad_norm": 5.545847415924072, |
|
"learning_rate": 0.00012792765136569544, |
|
"loss": 2.0229, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09020386072523905, |
|
"eval_loss": 2.196916341781616, |
|
"eval_runtime": 73.0911, |
|
"eval_samples_per_second": 15.966, |
|
"eval_steps_per_second": 3.995, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0938120151542486, |
|
"grad_norm": 1.4615882635116577, |
|
"learning_rate": 0.00012039360249617425, |
|
"loss": 2.2424, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09742016958325816, |
|
"grad_norm": 1.7726435661315918, |
|
"learning_rate": 0.00011280404514057264, |
|
"loss": 2.147, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10102832401226773, |
|
"grad_norm": 1.9420199394226074, |
|
"learning_rate": 0.00010519595485942743, |
|
"loss": 2.157, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10463647844127728, |
|
"grad_norm": 2.1413962841033936, |
|
"learning_rate": 9.76063975038258e-05, |
|
"loss": 1.9894, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10824463287028685, |
|
"grad_norm": 5.385281562805176, |
|
"learning_rate": 9.00723486343046e-05, |
|
"loss": 2.0386, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10824463287028685, |
|
"eval_loss": 2.1419291496276855, |
|
"eval_runtime": 73.4196, |
|
"eval_samples_per_second": 15.895, |
|
"eval_steps_per_second": 3.977, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11185278729929642, |
|
"grad_norm": 1.4185280799865723, |
|
"learning_rate": 8.263051337963623e-05, |
|
"loss": 2.2137, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11546094172830597, |
|
"grad_norm": 1.4617102146148682, |
|
"learning_rate": 7.531714761313074e-05, |
|
"loss": 2.1878, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11906909615731553, |
|
"grad_norm": 1.7664028406143188, |
|
"learning_rate": 6.816788131766559e-05, |
|
"loss": 2.0554, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1226772505863251, |
|
"grad_norm": 2.4227097034454346, |
|
"learning_rate": 6.121754499999055e-05, |
|
"loss": 2.0241, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12628540501533467, |
|
"grad_norm": 4.657759189605713, |
|
"learning_rate": 5.450000000000003e-05, |
|
"loss": 2.0051, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12628540501533467, |
|
"eval_loss": 2.0811328887939453, |
|
"eval_runtime": 73.35, |
|
"eval_samples_per_second": 15.91, |
|
"eval_steps_per_second": 3.981, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12989355944434422, |
|
"grad_norm": 1.0652941465377808, |
|
"learning_rate": 4.804797352168861e-05, |
|
"loss": 2.1246, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13350171387335377, |
|
"grad_norm": 1.3440688848495483, |
|
"learning_rate": 4.189289918950325e-05, |
|
"loss": 2.0742, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13710986830236335, |
|
"grad_norm": 1.5741288661956787, |
|
"learning_rate": 3.606476390688449e-05, |
|
"loss": 2.0424, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1407180227313729, |
|
"grad_norm": 1.601135492324829, |
|
"learning_rate": 3.0591961763087043e-05, |
|
"loss": 2.0331, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14432617716038246, |
|
"grad_norm": 4.815340518951416, |
|
"learning_rate": 2.550115570003141e-05, |
|
"loss": 2.0085, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14432617716038246, |
|
"eval_loss": 2.0443766117095947, |
|
"eval_runtime": 73.424, |
|
"eval_samples_per_second": 15.894, |
|
"eval_steps_per_second": 3.977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14793433158939204, |
|
"grad_norm": 1.3404114246368408, |
|
"learning_rate": 2.081714761313074e-05, |
|
"loss": 2.2207, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1515424860184016, |
|
"grad_norm": 1.5793532133102417, |
|
"learning_rate": 1.656275751894957e-05, |
|
"loss": 2.0376, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15515064044741114, |
|
"grad_norm": 1.5471128225326538, |
|
"learning_rate": 1.275871237837696e-05, |
|
"loss": 2.1071, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.15875879487642072, |
|
"grad_norm": 2.025006055831909, |
|
"learning_rate": 9.423545116956494e-06, |
|
"loss": 1.9576, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16236694930543027, |
|
"grad_norm": 3.5461795330047607, |
|
"learning_rate": 6.573504334335994e-06, |
|
"loss": 1.9234, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16236694930543027, |
|
"eval_loss": 2.035742998123169, |
|
"eval_runtime": 73.4959, |
|
"eval_samples_per_second": 15.878, |
|
"eval_steps_per_second": 3.973, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16597510373443983, |
|
"grad_norm": 1.0669924020767212, |
|
"learning_rate": 4.22247514272324e-06, |
|
"loss": 2.0513, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1695832581634494, |
|
"grad_norm": 1.5129503011703491, |
|
"learning_rate": 2.38191152001518e-06, |
|
"loss": 2.0959, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.17319141259245896, |
|
"grad_norm": 1.5468735694885254, |
|
"learning_rate": 1.0607805071688306e-06, |
|
"loss": 2.1127, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1767995670214685, |
|
"grad_norm": 1.8866146802902222, |
|
"learning_rate": 2.655185216791625e-07, |
|
"loss": 2.0466, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1804077214504781, |
|
"grad_norm": 4.180663108825684, |
|
"learning_rate": 0.0, |
|
"loss": 1.813, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1804077214504781, |
|
"eval_loss": 2.0386555194854736, |
|
"eval_runtime": 73.3404, |
|
"eval_samples_per_second": 15.912, |
|
"eval_steps_per_second": 3.981, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.72048437304361e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|