lesso02's picture
Training in progress, step 450, checkpoint
0067457 verified
raw
history blame
11 kB
{
"best_metric": 1.4701261520385742,
"best_model_checkpoint": "miner_id_24/checkpoint-450",
"epoch": 0.07600067556156055,
"eval_steps": 50,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00016889039013680122,
"eval_loss": 2.846999168395996,
"eval_runtime": 220.8139,
"eval_samples_per_second": 11.295,
"eval_steps_per_second": 2.826,
"step": 1
},
{
"epoch": 0.0016889039013680122,
"grad_norm": 3.695821523666382,
"learning_rate": 4.0400000000000006e-05,
"loss": 4.3694,
"step": 10
},
{
"epoch": 0.0033778078027360244,
"grad_norm": 6.247311592102051,
"learning_rate": 8.080000000000001e-05,
"loss": 3.9724,
"step": 20
},
{
"epoch": 0.005066711704104036,
"grad_norm": 5.978872299194336,
"learning_rate": 0.00012119999999999999,
"loss": 3.5361,
"step": 30
},
{
"epoch": 0.006755615605472049,
"grad_norm": 5.326415538787842,
"learning_rate": 0.00016160000000000002,
"loss": 3.546,
"step": 40
},
{
"epoch": 0.00844451950684006,
"grad_norm": 9.239958763122559,
"learning_rate": 0.000202,
"loss": 3.7854,
"step": 50
},
{
"epoch": 0.00844451950684006,
"eval_loss": 1.693381667137146,
"eval_runtime": 220.7239,
"eval_samples_per_second": 11.299,
"eval_steps_per_second": 2.827,
"step": 50
},
{
"epoch": 0.010133423408208072,
"grad_norm": 7.550833225250244,
"learning_rate": 0.00020175396907624226,
"loss": 2.9515,
"step": 60
},
{
"epoch": 0.011822327309576086,
"grad_norm": 3.227522134780884,
"learning_rate": 0.0002010170749428986,
"loss": 3.2175,
"step": 70
},
{
"epoch": 0.013511231210944098,
"grad_norm": 4.046976566314697,
"learning_rate": 0.00019979290767411438,
"loss": 3.5135,
"step": 80
},
{
"epoch": 0.01520013511231211,
"grad_norm": 8.03056812286377,
"learning_rate": 0.0001980874312897702,
"loss": 3.6262,
"step": 90
},
{
"epoch": 0.01688903901368012,
"grad_norm": 8.536865234375,
"learning_rate": 0.00019590895469937675,
"loss": 4.0803,
"step": 100
},
{
"epoch": 0.01688903901368012,
"eval_loss": 1.7287895679473877,
"eval_runtime": 221.2619,
"eval_samples_per_second": 11.272,
"eval_steps_per_second": 2.82,
"step": 100
},
{
"epoch": 0.018577942915048135,
"grad_norm": 3.2818431854248047,
"learning_rate": 0.0001932680912219027,
"loss": 2.962,
"step": 110
},
{
"epoch": 0.020266846816416145,
"grad_norm": 3.375999689102173,
"learning_rate": 0.00019017770687875164,
"loss": 3.0989,
"step": 120
},
{
"epoch": 0.021955750717784158,
"grad_norm": 3.638335943222046,
"learning_rate": 0.000186652857711799,
"loss": 3.3325,
"step": 130
},
{
"epoch": 0.02364465461915217,
"grad_norm": 6.7433762550354,
"learning_rate": 0.00018271071643186968,
"loss": 3.8392,
"step": 140
},
{
"epoch": 0.02533355852052018,
"grad_norm": 6.1501593589782715,
"learning_rate": 0.00017837048875501678,
"loss": 3.8591,
"step": 150
},
{
"epoch": 0.02533355852052018,
"eval_loss": 1.6560685634613037,
"eval_runtime": 221.9305,
"eval_samples_per_second": 11.238,
"eval_steps_per_second": 2.812,
"step": 150
},
{
"epoch": 0.027022462421888195,
"grad_norm": 2.800673246383667,
"learning_rate": 0.00017365331983420376,
"loss": 2.7496,
"step": 160
},
{
"epoch": 0.028711366323256205,
"grad_norm": 2.549720525741577,
"learning_rate": 0.0001685821912422447,
"loss": 3.2596,
"step": 170
},
{
"epoch": 0.03040027022462422,
"grad_norm": 4.524278163909912,
"learning_rate": 0.00016318180900789148,
"loss": 3.5261,
"step": 180
},
{
"epoch": 0.03208917412599223,
"grad_norm": 5.028242588043213,
"learning_rate": 0.00015747848325054544,
"loss": 3.4352,
"step": 190
},
{
"epoch": 0.03377807802736024,
"grad_norm": 6.687849044799805,
"learning_rate": 0.0001515,
"loss": 3.7396,
"step": 200
},
{
"epoch": 0.03377807802736024,
"eval_loss": 1.6160459518432617,
"eval_runtime": 221.3916,
"eval_samples_per_second": 11.265,
"eval_steps_per_second": 2.819,
"step": 200
},
{
"epoch": 0.035466981928728256,
"grad_norm": 2.702817440032959,
"learning_rate": 0.00014527548582569683,
"loss": 2.6468,
"step": 210
},
{
"epoch": 0.03715588583009627,
"grad_norm": 4.0655436515808105,
"learning_rate": 0.00013883526593500714,
"loss": 3.2802,
"step": 220
},
{
"epoch": 0.03884478973146428,
"grad_norm": 3.6759231090545654,
"learning_rate": 0.0001322107164318697,
"loss": 3.3334,
"step": 230
},
{
"epoch": 0.04053369363283229,
"grad_norm": 4.863379001617432,
"learning_rate": 0.00012543411145556643,
"loss": 3.2032,
"step": 240
},
{
"epoch": 0.0422225975342003,
"grad_norm": 6.288718223571777,
"learning_rate": 0.00011853846594435998,
"loss": 3.5356,
"step": 250
},
{
"epoch": 0.0422225975342003,
"eval_loss": 1.5846458673477173,
"eval_runtime": 221.1464,
"eval_samples_per_second": 11.278,
"eval_steps_per_second": 2.822,
"step": 250
},
{
"epoch": 0.043911501435568316,
"grad_norm": 3.2017464637756348,
"learning_rate": 0.00011155737479003301,
"loss": 2.8565,
"step": 260
},
{
"epoch": 0.04560040533693633,
"grad_norm": 3.160783529281616,
"learning_rate": 0.00010452484916695262,
"loss": 3.1377,
"step": 270
},
{
"epoch": 0.04728930923830434,
"grad_norm": 4.057458877563477,
"learning_rate": 9.747515083304742e-05,
"loss": 3.2321,
"step": 280
},
{
"epoch": 0.04897821313967235,
"grad_norm": 3.2311322689056396,
"learning_rate": 9.044262520996702e-05,
"loss": 3.1823,
"step": 290
},
{
"epoch": 0.05066711704104036,
"grad_norm": 6.725953578948975,
"learning_rate": 8.346153405564004e-05,
"loss": 3.3379,
"step": 300
},
{
"epoch": 0.05066711704104036,
"eval_loss": 1.5483067035675049,
"eval_runtime": 221.1074,
"eval_samples_per_second": 11.28,
"eval_steps_per_second": 2.822,
"step": 300
},
{
"epoch": 0.05235602094240838,
"grad_norm": 2.5730559825897217,
"learning_rate": 7.656588854443357e-05,
"loss": 2.8366,
"step": 310
},
{
"epoch": 0.05404492484377639,
"grad_norm": 2.557634115219116,
"learning_rate": 6.978928356813031e-05,
"loss": 2.963,
"step": 320
},
{
"epoch": 0.055733828745144404,
"grad_norm": 3.629833221435547,
"learning_rate": 6.316473406499288e-05,
"loss": 2.9596,
"step": 330
},
{
"epoch": 0.05742273264651241,
"grad_norm": 3.6670989990234375,
"learning_rate": 5.672451417430317e-05,
"loss": 3.2497,
"step": 340
},
{
"epoch": 0.059111636547880424,
"grad_norm": 4.485354900360107,
"learning_rate": 5.050000000000002e-05,
"loss": 3.1528,
"step": 350
},
{
"epoch": 0.059111636547880424,
"eval_loss": 1.5068892240524292,
"eval_runtime": 221.0414,
"eval_samples_per_second": 11.283,
"eval_steps_per_second": 2.823,
"step": 350
},
{
"epoch": 0.06080054044924844,
"grad_norm": 2.8697142601013184,
"learning_rate": 4.452151674945458e-05,
"loss": 2.7241,
"step": 360
},
{
"epoch": 0.06248944435061645,
"grad_norm": 4.0349016189575195,
"learning_rate": 3.8818190992108515e-05,
"loss": 2.9007,
"step": 370
},
{
"epoch": 0.06417834825198446,
"grad_norm": 3.124457359313965,
"learning_rate": 3.3417808757755355e-05,
"loss": 3.0556,
"step": 380
},
{
"epoch": 0.06586725215335247,
"grad_norm": 3.4859092235565186,
"learning_rate": 2.8346680165796253e-05,
"loss": 3.115,
"step": 390
},
{
"epoch": 0.06755615605472048,
"grad_norm": 4.689525127410889,
"learning_rate": 2.362951124498323e-05,
"loss": 3.1429,
"step": 400
},
{
"epoch": 0.06755615605472048,
"eval_loss": 1.4789899587631226,
"eval_runtime": 225.1952,
"eval_samples_per_second": 11.075,
"eval_steps_per_second": 2.771,
"step": 400
},
{
"epoch": 0.0692450599560885,
"grad_norm": 2.2446751594543457,
"learning_rate": 1.928928356813032e-05,
"loss": 2.6135,
"step": 410
},
{
"epoch": 0.07093396385745651,
"grad_norm": 2.9616000652313232,
"learning_rate": 1.5347142288200977e-05,
"loss": 2.8901,
"step": 420
},
{
"epoch": 0.07262286775882452,
"grad_norm": 2.9250693321228027,
"learning_rate": 1.1822293121248375e-05,
"loss": 2.8757,
"step": 430
},
{
"epoch": 0.07431177166019254,
"grad_norm": 2.9157168865203857,
"learning_rate": 8.731908778097302e-06,
"loss": 3.2046,
"step": 440
},
{
"epoch": 0.07600067556156055,
"grad_norm": 5.299656867980957,
"learning_rate": 6.09104530062326e-06,
"loss": 3.061,
"step": 450
},
{
"epoch": 0.07600067556156055,
"eval_loss": 1.4701261520385742,
"eval_runtime": 221.003,
"eval_samples_per_second": 11.285,
"eval_steps_per_second": 2.823,
"step": 450
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.44902656622592e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}