|
{ |
|
"best_metric": 1.041056513786316, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 3.0273224043715845, |
|
"eval_steps": 50, |
|
"global_step": 137, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"grad_norm": 0.054219260811805725, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.0221, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02185792349726776, |
|
"eval_loss": 1.3064125776290894, |
|
"eval_runtime": 1.5023, |
|
"eval_samples_per_second": 409.374, |
|
"eval_steps_per_second": 13.313, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04371584699453552, |
|
"grad_norm": 0.07374625653028488, |
|
"learning_rate": 2.32e-05, |
|
"loss": 1.13, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 0.08744122087955475, |
|
"learning_rate": 3.48e-05, |
|
"loss": 1.2471, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08743169398907104, |
|
"grad_norm": 0.1099563017487526, |
|
"learning_rate": 4.64e-05, |
|
"loss": 1.3512, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 0.14091312885284424, |
|
"learning_rate": 5.8e-05, |
|
"loss": 1.382, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13114754098360656, |
|
"grad_norm": 0.19244275987148285, |
|
"learning_rate": 6.96e-05, |
|
"loss": 1.524, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.15300546448087432, |
|
"grad_norm": 0.052936580032110214, |
|
"learning_rate": 8.12e-05, |
|
"loss": 1.0329, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.17486338797814208, |
|
"grad_norm": 0.06494678556919098, |
|
"learning_rate": 9.28e-05, |
|
"loss": 1.1503, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.19672131147540983, |
|
"grad_norm": 0.07551469653844833, |
|
"learning_rate": 0.0001044, |
|
"loss": 1.2085, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 0.08664041757583618, |
|
"learning_rate": 0.000116, |
|
"loss": 1.2444, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24043715846994534, |
|
"grad_norm": 0.10655322670936584, |
|
"learning_rate": 0.00011598225532067881, |
|
"loss": 1.3136, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.26229508196721313, |
|
"grad_norm": 0.14484980702400208, |
|
"learning_rate": 0.00011592903214042715, |
|
"loss": 1.3774, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.28415300546448086, |
|
"grad_norm": 0.049404121935367584, |
|
"learning_rate": 0.00011584036302573693, |
|
"loss": 0.9998, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.30601092896174864, |
|
"grad_norm": 0.05533352494239807, |
|
"learning_rate": 0.0001157163022319532, |
|
"loss": 1.077, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 0.06618451327085495, |
|
"learning_rate": 0.00011555692567007598, |
|
"loss": 1.1209, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.34972677595628415, |
|
"grad_norm": 0.07199019938707352, |
|
"learning_rate": 0.00011536233086031157, |
|
"loss": 1.2181, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.37158469945355194, |
|
"grad_norm": 0.08229127526283264, |
|
"learning_rate": 0.00011513263687240126, |
|
"loss": 1.2544, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.39344262295081966, |
|
"grad_norm": 0.10118231177330017, |
|
"learning_rate": 0.00011486798425276428, |
|
"loss": 1.3167, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.41530054644808745, |
|
"grad_norm": 0.06382325291633606, |
|
"learning_rate": 0.00011456853493849944, |
|
"loss": 0.9757, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 0.06287430226802826, |
|
"learning_rate": 0.0001142344721582983, |
|
"loss": 1.0141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45901639344262296, |
|
"grad_norm": 0.061046287417411804, |
|
"learning_rate": 0.00011386600032033012, |
|
"loss": 1.1142, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4808743169398907, |
|
"grad_norm": 0.05975975841283798, |
|
"learning_rate": 0.0001134633448871674, |
|
"loss": 1.172, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5027322404371585, |
|
"grad_norm": 0.06590148061513901, |
|
"learning_rate": 0.00011302675223782873, |
|
"loss": 1.1934, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5245901639344263, |
|
"grad_norm": 0.07652608305215836, |
|
"learning_rate": 0.00011255648951702296, |
|
"loss": 1.2285, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 0.11880210041999817, |
|
"learning_rate": 0.0001120528444716872, |
|
"loss": 1.2294, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5683060109289617, |
|
"grad_norm": 0.04327382519841194, |
|
"learning_rate": 0.00011151612527491878, |
|
"loss": 0.9457, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.5901639344262295, |
|
"grad_norm": 0.05113707482814789, |
|
"learning_rate": 0.00011094666033740846, |
|
"loss": 1.0301, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6120218579234973, |
|
"grad_norm": 0.04633456468582153, |
|
"learning_rate": 0.00011034479810649071, |
|
"loss": 1.1369, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6338797814207651, |
|
"grad_norm": 0.052176687866449356, |
|
"learning_rate": 0.00010971090685293396, |
|
"loss": 1.1575, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.05911482125520706, |
|
"learning_rate": 0.00010904537444560093, |
|
"loss": 1.1915, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6775956284153005, |
|
"grad_norm": 0.08560285717248917, |
|
"learning_rate": 0.0001083486081141173, |
|
"loss": 1.1844, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.6994535519125683, |
|
"grad_norm": 0.0443929098546505, |
|
"learning_rate": 0.00010762103419969393, |
|
"loss": 0.9784, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7213114754098361, |
|
"grad_norm": 0.04982827231287956, |
|
"learning_rate": 0.00010686309789425474, |
|
"loss": 1.0368, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7431693989071039, |
|
"grad_norm": 0.04613876715302467, |
|
"learning_rate": 0.00010607526296803026, |
|
"loss": 1.0534, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 0.04624936357140541, |
|
"learning_rate": 0.00010525801148578341, |
|
"loss": 1.1136, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.7868852459016393, |
|
"grad_norm": 0.050727903842926025, |
|
"learning_rate": 0.000104411843511841, |
|
"loss": 1.1563, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8087431693989071, |
|
"grad_norm": 0.07218360155820847, |
|
"learning_rate": 0.00010353727680411158, |
|
"loss": 1.148, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8306010928961749, |
|
"grad_norm": 0.04049117863178253, |
|
"learning_rate": 0.00010263484649727705, |
|
"loss": 0.9096, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.8524590163934426, |
|
"grad_norm": 0.0455789640545845, |
|
"learning_rate": 0.00010170510477535133, |
|
"loss": 1.0006, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 0.039463143795728683, |
|
"learning_rate": 0.00010074862053380711, |
|
"loss": 1.0411, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8961748633879781, |
|
"grad_norm": 0.042614974081516266, |
|
"learning_rate": 9.976597903147682e-05, |
|
"loss": 1.1396, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9180327868852459, |
|
"grad_norm": 0.04930881783366203, |
|
"learning_rate": 9.875778153244143e-05, |
|
"loss": 1.1744, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9398907103825137, |
|
"grad_norm": 0.06974472105503082, |
|
"learning_rate": 9.772464493812549e-05, |
|
"loss": 1.15, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.9617486338797814, |
|
"grad_norm": 0.04092060774564743, |
|
"learning_rate": 9.66672014098242e-05, |
|
"loss": 0.9676, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 0.0392816998064518, |
|
"learning_rate": 9.558609798189311e-05, |
|
"loss": 1.0893, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0163934426229508, |
|
"grad_norm": 0.08897832781076431, |
|
"learning_rate": 9.448199616583707e-05, |
|
"loss": 1.8898, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0382513661202186, |
|
"grad_norm": 0.03982605040073395, |
|
"learning_rate": 9.335557154554105e-05, |
|
"loss": 0.9943, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.0601092896174864, |
|
"grad_norm": 0.03858646750450134, |
|
"learning_rate": 9.220751336389013e-05, |
|
"loss": 1.0459, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0819672131147542, |
|
"grad_norm": 0.040587618947029114, |
|
"learning_rate": 9.10385241010317e-05, |
|
"loss": 1.1494, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.1038251366120218, |
|
"grad_norm": 0.052482884377241135, |
|
"learning_rate": 8.984931904453821e-05, |
|
"loss": 1.1475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1038251366120218, |
|
"eval_loss": 1.0756638050079346, |
|
"eval_runtime": 1.9721, |
|
"eval_samples_per_second": 311.855, |
|
"eval_steps_per_second": 10.142, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1256830601092895, |
|
"grad_norm": 0.07459885627031326, |
|
"learning_rate": 8.864062585173286e-05, |
|
"loss": 1.1567, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.1475409836065573, |
|
"grad_norm": 0.03736037015914917, |
|
"learning_rate": 8.741318410444684e-05, |
|
"loss": 0.9095, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.169398907103825, |
|
"grad_norm": 0.04274572804570198, |
|
"learning_rate": 8.616774485647986e-05, |
|
"loss": 1.0274, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.1912568306010929, |
|
"grad_norm": 0.03897716477513313, |
|
"learning_rate": 8.49050701740412e-05, |
|
"loss": 1.041, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.2131147540983607, |
|
"grad_norm": 0.042398180812597275, |
|
"learning_rate": 8.362593266945242e-05, |
|
"loss": 1.0944, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.2349726775956285, |
|
"grad_norm": 0.04643435403704643, |
|
"learning_rate": 8.233111502839728e-05, |
|
"loss": 1.1559, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.2568306010928962, |
|
"grad_norm": 0.06617248058319092, |
|
"learning_rate": 8.102140953100746e-05, |
|
"loss": 1.1503, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.278688524590164, |
|
"grad_norm": 0.03723934665322304, |
|
"learning_rate": 7.969761756707802e-05, |
|
"loss": 0.7836, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.3005464480874318, |
|
"grad_norm": 0.04756947606801987, |
|
"learning_rate": 7.83605491457085e-05, |
|
"loss": 1.0577, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.3224043715846996, |
|
"grad_norm": 0.04186735302209854, |
|
"learning_rate": 7.701102239967025e-05, |
|
"loss": 0.9977, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3442622950819672, |
|
"grad_norm": 0.04028384014964104, |
|
"learning_rate": 7.564986308480269e-05, |
|
"loss": 1.0792, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.366120218579235, |
|
"grad_norm": 0.04677554965019226, |
|
"learning_rate": 7.42779040747454e-05, |
|
"loss": 1.1321, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.3879781420765027, |
|
"grad_norm": 0.06085206940770149, |
|
"learning_rate": 7.289598485131474e-05, |
|
"loss": 1.1295, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.4098360655737705, |
|
"grad_norm": 0.039397455751895905, |
|
"learning_rate": 7.15049509908372e-05, |
|
"loss": 0.429, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.4316939890710383, |
|
"grad_norm": 0.06355661898851395, |
|
"learning_rate": 7.010565364675344e-05, |
|
"loss": 1.4871, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.453551912568306, |
|
"grad_norm": 0.037176258862018585, |
|
"learning_rate": 6.869894902880984e-05, |
|
"loss": 0.969, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.4754098360655736, |
|
"grad_norm": 0.038282133638858795, |
|
"learning_rate": 6.728569787915627e-05, |
|
"loss": 1.072, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.4972677595628414, |
|
"grad_norm": 0.044967859983444214, |
|
"learning_rate": 6.586676494567028e-05, |
|
"loss": 1.0984, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.5191256830601092, |
|
"grad_norm": 0.05643809214234352, |
|
"learning_rate": 6.444301845283067e-05, |
|
"loss": 1.1205, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.540983606557377, |
|
"grad_norm": 0.09565304219722748, |
|
"learning_rate": 6.301532957046325e-05, |
|
"loss": 1.1622, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5628415300546448, |
|
"grad_norm": 0.04223218932747841, |
|
"learning_rate": 6.15845718806849e-05, |
|
"loss": 0.9231, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.5846994535519126, |
|
"grad_norm": 0.040753450244665146, |
|
"learning_rate": 6.01516208433711e-05, |
|
"loss": 0.9777, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.6065573770491803, |
|
"grad_norm": 0.03819667920470238, |
|
"learning_rate": 5.871735326047505e-05, |
|
"loss": 1.0239, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.6284153005464481, |
|
"grad_norm": 0.04432765766978264, |
|
"learning_rate": 5.728264673952495e-05, |
|
"loss": 1.0698, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.650273224043716, |
|
"grad_norm": 0.05093759670853615, |
|
"learning_rate": 5.58483791566289e-05, |
|
"loss": 1.0708, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.6721311475409837, |
|
"grad_norm": 0.07523038983345032, |
|
"learning_rate": 5.441542811931513e-05, |
|
"loss": 1.118, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.6939890710382515, |
|
"grad_norm": 0.03937802463769913, |
|
"learning_rate": 5.298467042953676e-05, |
|
"loss": 0.895, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.7158469945355193, |
|
"grad_norm": 0.046086255460977554, |
|
"learning_rate": 5.1556981547169334e-05, |
|
"loss": 1.0295, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.737704918032787, |
|
"grad_norm": 0.044464047998189926, |
|
"learning_rate": 5.013323505432971e-05, |
|
"loss": 1.0139, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.7595628415300546, |
|
"grad_norm": 0.049823347479104996, |
|
"learning_rate": 4.871430212084374e-05, |
|
"loss": 1.0398, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7814207650273224, |
|
"grad_norm": 0.05059857666492462, |
|
"learning_rate": 4.730105097119016e-05, |
|
"loss": 1.1453, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.8032786885245902, |
|
"grad_norm": 0.0664261057972908, |
|
"learning_rate": 4.5894346353246564e-05, |
|
"loss": 1.0989, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.825136612021858, |
|
"grad_norm": 0.035497602075338364, |
|
"learning_rate": 4.44950490091628e-05, |
|
"loss": 0.7209, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.8469945355191257, |
|
"grad_norm": 0.048219550400972366, |
|
"learning_rate": 4.310401514868527e-05, |
|
"loss": 1.1382, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.8688524590163933, |
|
"grad_norm": 0.04112359508872032, |
|
"learning_rate": 4.1722095925254615e-05, |
|
"loss": 0.9578, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.890710382513661, |
|
"grad_norm": 0.04152638092637062, |
|
"learning_rate": 4.0350136915197304e-05, |
|
"loss": 1.042, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.9125683060109289, |
|
"grad_norm": 0.044837482273578644, |
|
"learning_rate": 3.898897760032974e-05, |
|
"loss": 1.0759, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.9344262295081966, |
|
"grad_norm": 0.05834497883915901, |
|
"learning_rate": 3.76394508542915e-05, |
|
"loss": 1.0805, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.9562841530054644, |
|
"grad_norm": 0.045810725539922714, |
|
"learning_rate": 3.6302382432922e-05, |
|
"loss": 0.5017, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.9781420765027322, |
|
"grad_norm": 0.056167762726545334, |
|
"learning_rate": 3.497859046899255e-05, |
|
"loss": 1.4732, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.010928961748634, |
|
"grad_norm": 0.10958977788686752, |
|
"learning_rate": 3.366888497160273e-05, |
|
"loss": 1.8592, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.0327868852459017, |
|
"grad_norm": 0.03569335490465164, |
|
"learning_rate": 3.2374067330547576e-05, |
|
"loss": 0.8658, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.0546448087431695, |
|
"grad_norm": 0.03896716982126236, |
|
"learning_rate": 3.109492982595882e-05, |
|
"loss": 0.9701, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.0765027322404372, |
|
"grad_norm": 0.0439588725566864, |
|
"learning_rate": 2.9832255143520147e-05, |
|
"loss": 1.0359, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.098360655737705, |
|
"grad_norm": 0.05064794421195984, |
|
"learning_rate": 2.8586815895553156e-05, |
|
"loss": 1.1003, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.120218579234973, |
|
"grad_norm": 0.07673317193984985, |
|
"learning_rate": 2.735937414826714e-05, |
|
"loss": 1.0672, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.1420765027322406, |
|
"grad_norm": 0.055169906467199326, |
|
"learning_rate": 2.6150680955461813e-05, |
|
"loss": 0.83, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.1639344262295084, |
|
"grad_norm": 0.04568566754460335, |
|
"learning_rate": 2.4961475898968298e-05, |
|
"loss": 1.0704, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.185792349726776, |
|
"grad_norm": 0.0388328842818737, |
|
"learning_rate": 2.3792486636109876e-05, |
|
"loss": 0.9818, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.2076502732240435, |
|
"grad_norm": 0.0437370240688324, |
|
"learning_rate": 2.2644428454458946e-05, |
|
"loss": 1.0655, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2076502732240435, |
|
"eval_loss": 1.041056513786316, |
|
"eval_runtime": 1.9941, |
|
"eval_samples_per_second": 308.415, |
|
"eval_steps_per_second": 10.03, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2295081967213113, |
|
"grad_norm": 0.04856366664171219, |
|
"learning_rate": 2.1518003834162954e-05, |
|
"loss": 1.0995, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 2.251366120218579, |
|
"grad_norm": 0.0619901567697525, |
|
"learning_rate": 2.0413902018106895e-05, |
|
"loss": 1.0795, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.273224043715847, |
|
"grad_norm": 0.053943440318107605, |
|
"learning_rate": 1.9332798590175797e-05, |
|
"loss": 0.5584, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 0.06046655401587486, |
|
"learning_rate": 1.8275355061874515e-05, |
|
"loss": 1.3933, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.3169398907103824, |
|
"grad_norm": 0.04138614237308502, |
|
"learning_rate": 1.724221846755858e-05, |
|
"loss": 0.9474, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.33879781420765, |
|
"grad_norm": 0.0419883206486702, |
|
"learning_rate": 1.623402096852318e-05, |
|
"loss": 1.0178, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.360655737704918, |
|
"grad_norm": 0.04966486990451813, |
|
"learning_rate": 1.5251379466192902e-05, |
|
"loss": 1.1369, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 2.3825136612021858, |
|
"grad_norm": 0.05595370754599571, |
|
"learning_rate": 1.4294895224648664e-05, |
|
"loss": 1.1341, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.4043715846994536, |
|
"grad_norm": 0.05111997202038765, |
|
"learning_rate": 1.3365153502722967e-05, |
|
"loss": 0.6285, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 2.4262295081967213, |
|
"grad_norm": 0.052137341350317, |
|
"learning_rate": 1.2462723195888415e-05, |
|
"loss": 1.2566, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.448087431693989, |
|
"grad_norm": 0.03865412250161171, |
|
"learning_rate": 1.1588156488159008e-05, |
|
"loss": 0.9759, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.469945355191257, |
|
"grad_norm": 0.03843948617577553, |
|
"learning_rate": 1.074198851421659e-05, |
|
"loss": 1.0035, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.4918032786885247, |
|
"grad_norm": 0.04497023671865463, |
|
"learning_rate": 9.924737031969744e-06, |
|
"loss": 1.0914, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.5136612021857925, |
|
"grad_norm": 0.05429847911000252, |
|
"learning_rate": 9.136902105745273e-06, |
|
"loss": 1.1209, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.5355191256830603, |
|
"grad_norm": 0.08493578433990479, |
|
"learning_rate": 8.378965800306078e-06, |
|
"loss": 1.1134, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.557377049180328, |
|
"grad_norm": 0.06231605261564255, |
|
"learning_rate": 7.651391885882701e-06, |
|
"loss": 0.965, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.579234972677596, |
|
"grad_norm": 0.03692341595888138, |
|
"learning_rate": 6.954625554399086e-06, |
|
"loss": 0.8894, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.6010928961748636, |
|
"grad_norm": 0.04275006055831909, |
|
"learning_rate": 6.289093147066023e-06, |
|
"loss": 1.0013, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 0.04334869980812073, |
|
"learning_rate": 5.655201893509272e-06, |
|
"loss": 1.0516, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.644808743169399, |
|
"grad_norm": 0.052981842309236526, |
|
"learning_rate": 5.053339662591549e-06, |
|
"loss": 1.0457, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.0736251100897789, |
|
"learning_rate": 4.483874725081219e-06, |
|
"loss": 1.1736, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.6885245901639343, |
|
"grad_norm": 0.057101909071207047, |
|
"learning_rate": 3.9471555283128005e-06, |
|
"loss": 0.8181, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.710382513661202, |
|
"grad_norm": 0.04115651920437813, |
|
"learning_rate": 3.4435104829770587e-06, |
|
"loss": 1.0691, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.73224043715847, |
|
"grad_norm": 0.038360998034477234, |
|
"learning_rate": 2.9732477621712853e-06, |
|
"loss": 0.986, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.7540983606557377, |
|
"grad_norm": 0.0409964919090271, |
|
"learning_rate": 2.53665511283261e-06, |
|
"loss": 1.0381, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.7759562841530054, |
|
"grad_norm": 0.04703905060887337, |
|
"learning_rate": 2.1339996796698887e-06, |
|
"loss": 1.0692, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.797814207650273, |
|
"grad_norm": 0.06132422015070915, |
|
"learning_rate": 1.7655278417016956e-06, |
|
"loss": 1.117, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.819672131147541, |
|
"grad_norm": 0.05300451070070267, |
|
"learning_rate": 1.4314650615005687e-06, |
|
"loss": 0.543, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.841530054644809, |
|
"grad_norm": 0.055577926337718964, |
|
"learning_rate": 1.1320157472357307e-06, |
|
"loss": 1.3244, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.8633879781420766, |
|
"grad_norm": 0.04094787687063217, |
|
"learning_rate": 8.673631275987297e-07, |
|
"loss": 0.9801, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.8852459016393444, |
|
"grad_norm": 0.0408557653427124, |
|
"learning_rate": 6.376691396884168e-07, |
|
"loss": 1.0152, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.907103825136612, |
|
"grad_norm": 0.047284748405218124, |
|
"learning_rate": 4.430743299240307e-07, |
|
"loss": 1.0816, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.92896174863388, |
|
"grad_norm": 0.055055923759937286, |
|
"learning_rate": 2.836977680468222e-07, |
|
"loss": 1.0597, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 0.05081977695226669, |
|
"learning_rate": 1.5963697426306723e-07, |
|
"loss": 0.6892, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.972677595628415, |
|
"grad_norm": 0.055216483771800995, |
|
"learning_rate": 7.096785957284602e-08, |
|
"loss": 1.341, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.0054644808743167, |
|
"grad_norm": 0.10709080845117569, |
|
"learning_rate": 1.774467932117818e-08, |
|
"loss": 1.761, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.0273224043715845, |
|
"grad_norm": 0.03700735419988632, |
|
"learning_rate": 0.0, |
|
"loss": 0.8896, |
|
"step": 137 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 137, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.74466163399721e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|