diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,15607 @@ +{ + "best_metric": 0.07300831, + "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v0-20250101-185154/checkpoint-6036", + "epoch": 14.0, + "eval_steps": 200, + "global_step": 7759, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "acc": 0.66110599, + "epoch": 0.0003313452617627568, + "grad_norm": 50.27714337190847, + "learning_rate": 0.0, + "loss": 3.7548461, + "memory(GiB)": 31.98, + "step": 1, + "train_speed(iter/s)": 0.051634 + }, + { + "acc": 0.65853631, + "epoch": 0.0016567263088137839, + "grad_norm": 59.248621100065584, + "learning_rate": 2.0086965557947446e-06, + "loss": 3.62318873, + "memory(GiB)": 33.01, + "step": 5, + "train_speed(iter/s)": 0.124439 + }, + { + "acc": 0.7222682, + "epoch": 0.0033134526176275677, + "grad_norm": 21.791730292901168, + "learning_rate": 2.8737950746583047e-06, + "loss": 1.76198959, + "memory(GiB)": 33.01, + "step": 10, + "train_speed(iter/s)": 0.15095 + }, + { + "acc": 0.77555671, + "epoch": 0.004970178926441352, + "grad_norm": 18.163931905418693, + "learning_rate": 3.379845267622901e-06, + "loss": 1.16682892, + "memory(GiB)": 33.01, + "step": 15, + "train_speed(iter/s)": 0.162546 + }, + { + "acc": 0.79614887, + "epoch": 0.0066269052352551355, + "grad_norm": 23.170639029991356, + "learning_rate": 3.738893593521864e-06, + "loss": 1.05961266, + "memory(GiB)": 33.01, + "step": 20, + "train_speed(iter/s)": 0.16897 + }, + { + "acc": 0.79792585, + "epoch": 0.00828363154406892, + "grad_norm": 25.79687222394801, + "learning_rate": 4.017393111589489e-06, + "loss": 1.11055069, + "memory(GiB)": 33.01, + "step": 25, + "train_speed(iter/s)": 0.173184 + }, + { + "acc": 0.80801973, + "epoch": 0.009940357852882704, + "grad_norm": 15.051157865550348, + "learning_rate": 4.24494378648646e-06, + "loss": 0.99607706, + "memory(GiB)": 33.01, + "step": 30, + "train_speed(iter/s)": 0.176052 + }, + { + "acc": 0.82084551, + "epoch": 0.011597084161696488, + "grad_norm": 10.223050479974178, + "learning_rate": 4.437335140791102e-06, + "loss": 0.95394745, + "memory(GiB)": 33.01, + "step": 35, + "train_speed(iter/s)": 0.17821 + }, + { + "acc": 0.83056355, + "epoch": 0.013253810470510271, + "grad_norm": 11.864398422104427, + "learning_rate": 4.603992112385424e-06, + "loss": 0.87341337, + "memory(GiB)": 33.01, + "step": 40, + "train_speed(iter/s)": 0.179859 + }, + { + "acc": 0.81267252, + "epoch": 0.014910536779324055, + "grad_norm": 12.40283491772095, + "learning_rate": 4.750993979451057e-06, + "loss": 0.95986633, + "memory(GiB)": 33.01, + "step": 45, + "train_speed(iter/s)": 0.181182 + }, + { + "acc": 0.83013372, + "epoch": 0.01656726308813784, + "grad_norm": 10.434349025985295, + "learning_rate": 4.882491630453049e-06, + "loss": 0.87351093, + "memory(GiB)": 33.01, + "step": 50, + "train_speed(iter/s)": 0.182219 + }, + { + "acc": 0.83825665, + "epoch": 0.018223989396951624, + "grad_norm": 9.483246391000499, + "learning_rate": 5.001445725187638e-06, + "loss": 0.84431524, + "memory(GiB)": 33.01, + "step": 55, + "train_speed(iter/s)": 0.183104 + }, + { + "acc": 0.81640253, + "epoch": 0.019880715705765408, + "grad_norm": 10.776285793721472, + "learning_rate": 5.11004230535002e-06, + "loss": 0.95539846, + "memory(GiB)": 33.01, + "step": 60, + "train_speed(iter/s)": 0.183885 + }, + { + "acc": 0.82131834, + "epoch": 0.021537442014579192, + "grad_norm": 8.627366353102206, + "learning_rate": 5.209941475102492e-06, + "loss": 0.92218132, + "memory(GiB)": 33.01, + "step": 65, + "train_speed(iter/s)": 0.184523 + }, + { + "acc": 0.81877251, + "epoch": 0.023194168323392977, + "grad_norm": 7.73227486810335, + "learning_rate": 5.302433659654663e-06, + "loss": 0.91788635, + "memory(GiB)": 33.01, + "step": 70, + "train_speed(iter/s)": 0.185058 + }, + { + "acc": 0.83363972, + "epoch": 0.02485089463220676, + "grad_norm": 10.385061722217497, + "learning_rate": 5.388541823417645e-06, + "loss": 0.89225368, + "memory(GiB)": 33.01, + "step": 75, + "train_speed(iter/s)": 0.185511 + }, + { + "acc": 0.8337285, + "epoch": 0.026507620941020542, + "grad_norm": 7.860817957230683, + "learning_rate": 5.469090631248983e-06, + "loss": 0.87068348, + "memory(GiB)": 33.01, + "step": 80, + "train_speed(iter/s)": 0.185937 + }, + { + "acc": 0.84761944, + "epoch": 0.028164347249834326, + "grad_norm": 9.115430859362677, + "learning_rate": 5.5447546056702515e-06, + "loss": 0.81180515, + "memory(GiB)": 33.01, + "step": 85, + "train_speed(iter/s)": 0.186316 + }, + { + "acc": 0.84264908, + "epoch": 0.02982107355864811, + "grad_norm": 13.663567397042407, + "learning_rate": 5.616092498314617e-06, + "loss": 0.88545876, + "memory(GiB)": 33.01, + "step": 90, + "train_speed(iter/s)": 0.186635 + }, + { + "acc": 0.84530535, + "epoch": 0.031477799867461895, + "grad_norm": 7.4754005630581135, + "learning_rate": 5.683572355914555e-06, + "loss": 0.82264423, + "memory(GiB)": 33.01, + "step": 95, + "train_speed(iter/s)": 0.186926 + }, + { + "acc": 0.84374695, + "epoch": 0.03313452617627568, + "grad_norm": 11.99236287767985, + "learning_rate": 5.7475901493166095e-06, + "loss": 0.78904347, + "memory(GiB)": 33.01, + "step": 100, + "train_speed(iter/s)": 0.187211 + }, + { + "acc": 0.84980297, + "epoch": 0.03479125248508946, + "grad_norm": 7.905497376563473, + "learning_rate": 5.808483852619259e-06, + "loss": 0.77175064, + "memory(GiB)": 33.01, + "step": 105, + "train_speed(iter/s)": 0.187455 + }, + { + "acc": 0.84676991, + "epoch": 0.03644797879390325, + "grad_norm": 8.403941397142672, + "learning_rate": 5.8665442440511985e-06, + "loss": 0.7824954, + "memory(GiB)": 33.01, + "step": 110, + "train_speed(iter/s)": 0.187681 + }, + { + "acc": 0.83980999, + "epoch": 0.03810470510271703, + "grad_norm": 8.971608842298808, + "learning_rate": 5.922023303967215e-06, + "loss": 0.83844948, + "memory(GiB)": 33.01, + "step": 115, + "train_speed(iter/s)": 0.187886 + }, + { + "acc": 0.83745537, + "epoch": 0.039761431411530816, + "grad_norm": 11.092540697396046, + "learning_rate": 5.97514082421358e-06, + "loss": 0.84876623, + "memory(GiB)": 33.01, + "step": 120, + "train_speed(iter/s)": 0.18808 + }, + { + "acc": 0.8322444, + "epoch": 0.0414181577203446, + "grad_norm": 3.396144727915799, + "learning_rate": 6.026089667384234e-06, + "loss": 0.85240536, + "memory(GiB)": 33.01, + "step": 125, + "train_speed(iter/s)": 0.18825 + }, + { + "acc": 0.83602228, + "epoch": 0.043074884029158385, + "grad_norm": 8.270916567528229, + "learning_rate": 6.075039993966052e-06, + "loss": 0.82148342, + "memory(GiB)": 33.01, + "step": 130, + "train_speed(iter/s)": 0.188408 + }, + { + "acc": 0.81821384, + "epoch": 0.04473161033797217, + "grad_norm": 7.753889762215579, + "learning_rate": 6.122142691279214e-06, + "loss": 0.88220882, + "memory(GiB)": 33.01, + "step": 135, + "train_speed(iter/s)": 0.188567 + }, + { + "acc": 0.84211292, + "epoch": 0.04638833664678595, + "grad_norm": 7.027352256083138, + "learning_rate": 6.167532178518222e-06, + "loss": 0.83544054, + "memory(GiB)": 33.01, + "step": 140, + "train_speed(iter/s)": 0.188713 + }, + { + "acc": 0.83703737, + "epoch": 0.04804506295559974, + "grad_norm": 6.462244441845264, + "learning_rate": 6.211328719346929e-06, + "loss": 0.80138245, + "memory(GiB)": 33.01, + "step": 145, + "train_speed(iter/s)": 0.188826 + }, + { + "acc": 0.85498161, + "epoch": 0.04970178926441352, + "grad_norm": 8.758579967575177, + "learning_rate": 6.253640342281205e-06, + "loss": 0.77901378, + "memory(GiB)": 33.01, + "step": 150, + "train_speed(iter/s)": 0.188819 + }, + { + "acc": 0.85058413, + "epoch": 0.051358515573227306, + "grad_norm": 17.338366738017733, + "learning_rate": 6.2945644460697445e-06, + "loss": 0.7858984, + "memory(GiB)": 33.01, + "step": 155, + "train_speed(iter/s)": 0.188946 + }, + { + "acc": 0.84381838, + "epoch": 0.053015241882041084, + "grad_norm": 5.435548438025349, + "learning_rate": 6.3341891501125445e-06, + "loss": 0.77058525, + "memory(GiB)": 33.01, + "step": 160, + "train_speed(iter/s)": 0.18896 + }, + { + "acc": 0.83954039, + "epoch": 0.05467196819085487, + "grad_norm": 4.399997862609139, + "learning_rate": 6.372594437015794e-06, + "loss": 0.80552349, + "memory(GiB)": 33.01, + "step": 165, + "train_speed(iter/s)": 0.189058 + }, + { + "acc": 0.85681477, + "epoch": 0.05632869449966865, + "grad_norm": 5.206696585986359, + "learning_rate": 6.409853124533811e-06, + "loss": 0.73895197, + "memory(GiB)": 33.01, + "step": 170, + "train_speed(iter/s)": 0.189155 + }, + { + "acc": 0.85199137, + "epoch": 0.05798542080848244, + "grad_norm": 4.4333768843681725, + "learning_rate": 6.446031696585847e-06, + "loss": 0.77676201, + "memory(GiB)": 33.01, + "step": 175, + "train_speed(iter/s)": 0.189245 + }, + { + "acc": 0.85372562, + "epoch": 0.05964214711729622, + "grad_norm": 17.001757223516282, + "learning_rate": 6.4811910171781766e-06, + "loss": 0.76763196, + "memory(GiB)": 33.01, + "step": 180, + "train_speed(iter/s)": 0.189335 + }, + { + "acc": 0.84696083, + "epoch": 0.061298873426110005, + "grad_norm": 7.799436298527937, + "learning_rate": 6.515386946489136e-06, + "loss": 0.76351337, + "memory(GiB)": 33.01, + "step": 185, + "train_speed(iter/s)": 0.189422 + }, + { + "acc": 0.85822506, + "epoch": 0.06295559973492379, + "grad_norm": 11.328352996137795, + "learning_rate": 6.548670874778115e-06, + "loss": 0.79490757, + "memory(GiB)": 33.01, + "step": 190, + "train_speed(iter/s)": 0.189494 + }, + { + "acc": 0.85514393, + "epoch": 0.06461232604373758, + "grad_norm": 6.912811704629352, + "learning_rate": 6.581090186930649e-06, + "loss": 0.7586585, + "memory(GiB)": 33.01, + "step": 195, + "train_speed(iter/s)": 0.189567 + }, + { + "acc": 0.84247036, + "epoch": 0.06626905235255136, + "grad_norm": 5.22561576153318, + "learning_rate": 6.612688668180168e-06, + "loss": 0.811413, + "memory(GiB)": 33.01, + "step": 200, + "train_speed(iter/s)": 0.189625 + }, + { + "acc": 0.84955778, + "epoch": 0.06792577866136515, + "grad_norm": 7.588959185273035, + "learning_rate": 6.643506859724346e-06, + "loss": 0.76285973, + "memory(GiB)": 33.01, + "step": 205, + "train_speed(iter/s)": 0.189696 + }, + { + "acc": 0.84231005, + "epoch": 0.06958250497017893, + "grad_norm": 5.213604633751543, + "learning_rate": 6.6735823714828186e-06, + "loss": 0.76322999, + "memory(GiB)": 33.01, + "step": 210, + "train_speed(iter/s)": 0.189766 + }, + { + "acc": 0.8555872, + "epoch": 0.0712392312789927, + "grad_norm": 5.17769941057491, + "learning_rate": 6.702950158049068e-06, + "loss": 0.73766384, + "memory(GiB)": 33.01, + "step": 215, + "train_speed(iter/s)": 0.189824 + }, + { + "acc": 0.85509701, + "epoch": 0.0728959575878065, + "grad_norm": 9.338793341973062, + "learning_rate": 6.731642762914758e-06, + "loss": 0.74758358, + "memory(GiB)": 33.01, + "step": 220, + "train_speed(iter/s)": 0.18988 + }, + { + "acc": 0.85551167, + "epoch": 0.07455268389662027, + "grad_norm": 7.799786407663588, + "learning_rate": 6.759690535245802e-06, + "loss": 0.72227631, + "memory(GiB)": 33.01, + "step": 225, + "train_speed(iter/s)": 0.189939 + }, + { + "acc": 0.85811892, + "epoch": 0.07620941020543406, + "grad_norm": 5.997515776481016, + "learning_rate": 6.7871218228307745e-06, + "loss": 0.72223263, + "memory(GiB)": 33.01, + "step": 230, + "train_speed(iter/s)": 0.18999 + }, + { + "acc": 0.85784416, + "epoch": 0.07786613651424784, + "grad_norm": 8.41060544225861, + "learning_rate": 6.813963144277111e-06, + "loss": 0.77040291, + "memory(GiB)": 33.01, + "step": 235, + "train_speed(iter/s)": 0.190035 + }, + { + "acc": 0.85461082, + "epoch": 0.07952286282306163, + "grad_norm": 8.220098968672605, + "learning_rate": 6.84023934307714e-06, + "loss": 0.75113549, + "memory(GiB)": 33.01, + "step": 240, + "train_speed(iter/s)": 0.190082 + }, + { + "acc": 0.84146032, + "epoch": 0.08117958913187541, + "grad_norm": 5.976461123958971, + "learning_rate": 6.865973725787461e-06, + "loss": 0.78689213, + "memory(GiB)": 33.01, + "step": 245, + "train_speed(iter/s)": 0.190128 + }, + { + "acc": 0.85554123, + "epoch": 0.0828363154406892, + "grad_norm": 3.9102866660697404, + "learning_rate": 6.891188186247792e-06, + "loss": 0.73804913, + "memory(GiB)": 33.01, + "step": 250, + "train_speed(iter/s)": 0.190174 + }, + { + "acc": 0.85871716, + "epoch": 0.08449304174950298, + "grad_norm": 10.08654606800113, + "learning_rate": 6.915903317498407e-06, + "loss": 0.72549114, + "memory(GiB)": 33.01, + "step": 255, + "train_speed(iter/s)": 0.190215 + }, + { + "acc": 0.83968134, + "epoch": 0.08614976805831677, + "grad_norm": 7.7105875410612565, + "learning_rate": 6.940138512829612e-06, + "loss": 0.84207258, + "memory(GiB)": 33.01, + "step": 260, + "train_speed(iter/s)": 0.190248 + }, + { + "acc": 0.85006838, + "epoch": 0.08780649436713055, + "grad_norm": 5.088348470726073, + "learning_rate": 6.963912057205657e-06, + "loss": 0.78956671, + "memory(GiB)": 33.01, + "step": 265, + "train_speed(iter/s)": 0.190286 + }, + { + "acc": 0.8407608, + "epoch": 0.08946322067594434, + "grad_norm": 7.157954060997024, + "learning_rate": 6.987241210142773e-06, + "loss": 0.80997934, + "memory(GiB)": 33.01, + "step": 270, + "train_speed(iter/s)": 0.190318 + }, + { + "acc": 0.84329042, + "epoch": 0.09111994698475812, + "grad_norm": 4.080799357922467, + "learning_rate": 7.010142280982383e-06, + "loss": 0.78907938, + "memory(GiB)": 33.01, + "step": 275, + "train_speed(iter/s)": 0.19035 + }, + { + "acc": 0.8445241, + "epoch": 0.0927766732935719, + "grad_norm": 4.975765769175333, + "learning_rate": 7.0326306973817816e-06, + "loss": 0.79310322, + "memory(GiB)": 33.01, + "step": 280, + "train_speed(iter/s)": 0.190378 + }, + { + "acc": 0.82853336, + "epoch": 0.09443339960238568, + "grad_norm": 3.8534560662201405, + "learning_rate": 7.054721067742711e-06, + "loss": 0.82995491, + "memory(GiB)": 33.01, + "step": 285, + "train_speed(iter/s)": 0.19042 + }, + { + "acc": 0.84708948, + "epoch": 0.09609012591119948, + "grad_norm": 7.862309897547912, + "learning_rate": 7.076427238210489e-06, + "loss": 0.78673692, + "memory(GiB)": 33.01, + "step": 290, + "train_speed(iter/s)": 0.190458 + }, + { + "acc": 0.85356722, + "epoch": 0.09774685222001325, + "grad_norm": 7.267843653288115, + "learning_rate": 7.097762344800689e-06, + "loss": 0.74209976, + "memory(GiB)": 33.01, + "step": 295, + "train_speed(iter/s)": 0.190496 + }, + { + "acc": 0.85457726, + "epoch": 0.09940357852882704, + "grad_norm": 4.996312399649092, + "learning_rate": 7.118738861144765e-06, + "loss": 0.73887339, + "memory(GiB)": 33.01, + "step": 300, + "train_speed(iter/s)": 0.190532 + }, + { + "acc": 0.85430183, + "epoch": 0.10106030483764082, + "grad_norm": 9.04522225013425, + "learning_rate": 7.13936864228921e-06, + "loss": 0.73701906, + "memory(GiB)": 33.01, + "step": 305, + "train_speed(iter/s)": 0.190558 + }, + { + "acc": 0.84619389, + "epoch": 0.10271703114645461, + "grad_norm": 3.602019206226853, + "learning_rate": 7.159662964933303e-06, + "loss": 0.76839342, + "memory(GiB)": 33.01, + "step": 310, + "train_speed(iter/s)": 0.190594 + }, + { + "acc": 0.85931883, + "epoch": 0.10437375745526839, + "grad_norm": 6.644233975861472, + "learning_rate": 7.179632564447415e-06, + "loss": 0.72944298, + "memory(GiB)": 33.01, + "step": 315, + "train_speed(iter/s)": 0.190628 + }, + { + "acc": 0.85696793, + "epoch": 0.10603048376408217, + "grad_norm": 6.89152177234198, + "learning_rate": 7.199287668976103e-06, + "loss": 0.71098285, + "memory(GiB)": 33.01, + "step": 320, + "train_speed(iter/s)": 0.19066 + }, + { + "acc": 0.84302912, + "epoch": 0.10768721007289596, + "grad_norm": 13.225574415600349, + "learning_rate": 7.218638030897237e-06, + "loss": 0.8497488, + "memory(GiB)": 33.01, + "step": 325, + "train_speed(iter/s)": 0.190693 + }, + { + "acc": 0.85180035, + "epoch": 0.10934393638170974, + "grad_norm": 9.01186226545562, + "learning_rate": 7.237692955879354e-06, + "loss": 0.78130593, + "memory(GiB)": 33.01, + "step": 330, + "train_speed(iter/s)": 0.190723 + }, + { + "acc": 0.84967728, + "epoch": 0.11100066269052353, + "grad_norm": 4.417536183486161, + "learning_rate": 7.256461329754014e-06, + "loss": 0.72536817, + "memory(GiB)": 33.01, + "step": 335, + "train_speed(iter/s)": 0.190753 + }, + { + "acc": 0.87272768, + "epoch": 0.1126573889993373, + "grad_norm": 5.571498245437071, + "learning_rate": 7.274951643397372e-06, + "loss": 0.68704929, + "memory(GiB)": 33.01, + "step": 340, + "train_speed(iter/s)": 0.190773 + }, + { + "acc": 0.86904411, + "epoch": 0.1143141153081511, + "grad_norm": 5.480038545049546, + "learning_rate": 7.293172015795371e-06, + "loss": 0.64733562, + "memory(GiB)": 33.01, + "step": 345, + "train_speed(iter/s)": 0.1908 + }, + { + "acc": 0.88172531, + "epoch": 0.11597084161696487, + "grad_norm": 4.423695596891556, + "learning_rate": 7.311130215449407e-06, + "loss": 0.63284264, + "memory(GiB)": 33.01, + "step": 350, + "train_speed(iter/s)": 0.190817 + }, + { + "acc": 0.86758823, + "epoch": 0.11762756792577866, + "grad_norm": 11.959440699921517, + "learning_rate": 7.328833680263689e-06, + "loss": 0.71492028, + "memory(GiB)": 33.01, + "step": 355, + "train_speed(iter/s)": 0.190848 + }, + { + "acc": 0.85512447, + "epoch": 0.11928429423459244, + "grad_norm": 11.531180840267398, + "learning_rate": 7.346289536041736e-06, + "loss": 0.74734936, + "memory(GiB)": 33.01, + "step": 360, + "train_speed(iter/s)": 0.190867 + }, + { + "acc": 0.86098652, + "epoch": 0.12094102054340623, + "grad_norm": 6.38954821051966, + "learning_rate": 7.363504613707136e-06, + "loss": 0.6976438, + "memory(GiB)": 33.01, + "step": 365, + "train_speed(iter/s)": 0.190887 + }, + { + "acc": 0.85538502, + "epoch": 0.12259774685222001, + "grad_norm": 7.489897904281903, + "learning_rate": 7.380485465352696e-06, + "loss": 0.72213078, + "memory(GiB)": 33.01, + "step": 370, + "train_speed(iter/s)": 0.19091 + }, + { + "acc": 0.85435352, + "epoch": 0.1242544731610338, + "grad_norm": 6.5745061795079955, + "learning_rate": 7.397238379212389e-06, + "loss": 0.7297389, + "memory(GiB)": 33.01, + "step": 375, + "train_speed(iter/s)": 0.190928 + }, + { + "acc": 0.84575977, + "epoch": 0.12591119946984758, + "grad_norm": 6.3145621197054895, + "learning_rate": 7.413769393641675e-06, + "loss": 0.76131997, + "memory(GiB)": 33.01, + "step": 380, + "train_speed(iter/s)": 0.190915 + }, + { + "acc": 0.85743666, + "epoch": 0.12756792577866136, + "grad_norm": 5.939012809105342, + "learning_rate": 7.430084310183997e-06, + "loss": 0.71653161, + "memory(GiB)": 33.01, + "step": 385, + "train_speed(iter/s)": 0.190927 + }, + { + "acc": 0.84744682, + "epoch": 0.12922465208747516, + "grad_norm": 7.54414322312517, + "learning_rate": 7.4461887057942085e-06, + "loss": 0.7426198, + "memory(GiB)": 33.01, + "step": 390, + "train_speed(iter/s)": 0.19092 + }, + { + "acc": 0.84886322, + "epoch": 0.13088137839628894, + "grad_norm": 8.937659297884245, + "learning_rate": 7.46208794428338e-06, + "loss": 0.75673151, + "memory(GiB)": 33.01, + "step": 395, + "train_speed(iter/s)": 0.190942 + }, + { + "acc": 0.84283085, + "epoch": 0.13253810470510272, + "grad_norm": 7.224979630512225, + "learning_rate": 7.477787187043728e-06, + "loss": 0.76008615, + "memory(GiB)": 33.01, + "step": 400, + "train_speed(iter/s)": 0.190961 + }, + { + "acc": 0.85820484, + "epoch": 0.1341948310139165, + "grad_norm": 6.728197096262106, + "learning_rate": 7.493291403107369e-06, + "loss": 0.73872232, + "memory(GiB)": 33.01, + "step": 405, + "train_speed(iter/s)": 0.190981 + }, + { + "acc": 0.83822203, + "epoch": 0.1358515573227303, + "grad_norm": 11.090670435976039, + "learning_rate": 7.5086053785879055e-06, + "loss": 0.83083191, + "memory(GiB)": 33.01, + "step": 410, + "train_speed(iter/s)": 0.190997 + }, + { + "acc": 0.83120193, + "epoch": 0.13750828363154408, + "grad_norm": 7.014092801931885, + "learning_rate": 7.52373372554976e-06, + "loss": 0.81330185, + "memory(GiB)": 33.01, + "step": 415, + "train_speed(iter/s)": 0.191013 + }, + { + "acc": 0.8598774, + "epoch": 0.13916500994035785, + "grad_norm": 5.396661691435741, + "learning_rate": 7.538680890346378e-06, + "loss": 0.73769159, + "memory(GiB)": 33.01, + "step": 420, + "train_speed(iter/s)": 0.191023 + }, + { + "acc": 0.85436583, + "epoch": 0.14082173624917163, + "grad_norm": 5.4805303609602625, + "learning_rate": 7.5534511614649965e-06, + "loss": 0.74521694, + "memory(GiB)": 33.01, + "step": 425, + "train_speed(iter/s)": 0.191036 + }, + { + "acc": 0.87025623, + "epoch": 0.1424784625579854, + "grad_norm": 5.209954254433347, + "learning_rate": 7.568048676912627e-06, + "loss": 0.69971952, + "memory(GiB)": 33.01, + "step": 430, + "train_speed(iter/s)": 0.191054 + }, + { + "acc": 0.84417782, + "epoch": 0.1441351888667992, + "grad_norm": 5.511804934678127, + "learning_rate": 7.582477431175086e-06, + "loss": 0.77695208, + "memory(GiB)": 33.01, + "step": 435, + "train_speed(iter/s)": 0.191071 + }, + { + "acc": 0.85898895, + "epoch": 0.145791915175613, + "grad_norm": 7.816283737518623, + "learning_rate": 7.596741281778317e-06, + "loss": 0.6926734, + "memory(GiB)": 33.01, + "step": 440, + "train_speed(iter/s)": 0.191083 + }, + { + "acc": 0.84782066, + "epoch": 0.14744864148442677, + "grad_norm": 6.60501392294181, + "learning_rate": 7.610843955479014e-06, + "loss": 0.78016205, + "memory(GiB)": 33.01, + "step": 445, + "train_speed(iter/s)": 0.191102 + }, + { + "acc": 0.85637875, + "epoch": 0.14910536779324055, + "grad_norm": 7.4952807869711675, + "learning_rate": 7.624789054109362e-06, + "loss": 0.71651783, + "memory(GiB)": 33.01, + "step": 450, + "train_speed(iter/s)": 0.191123 + }, + { + "acc": 0.86040745, + "epoch": 0.15076209410205435, + "grad_norm": 3.9434476633183064, + "learning_rate": 7.63858006009885e-06, + "loss": 0.67413173, + "memory(GiB)": 33.01, + "step": 455, + "train_speed(iter/s)": 0.191135 + }, + { + "acc": 0.86539726, + "epoch": 0.15241882041086813, + "grad_norm": 3.5118401097305174, + "learning_rate": 7.652220341694333e-06, + "loss": 0.72222414, + "memory(GiB)": 33.01, + "step": 460, + "train_speed(iter/s)": 0.191152 + }, + { + "acc": 0.85555248, + "epoch": 0.1540755467196819, + "grad_norm": 5.947399321976254, + "learning_rate": 7.6657131578979e-06, + "loss": 0.74063187, + "memory(GiB)": 33.01, + "step": 465, + "train_speed(iter/s)": 0.19117 + }, + { + "acc": 0.84811583, + "epoch": 0.15573227302849568, + "grad_norm": 5.697052687978797, + "learning_rate": 7.67906166314067e-06, + "loss": 0.77786217, + "memory(GiB)": 33.01, + "step": 470, + "train_speed(iter/s)": 0.191183 + }, + { + "acc": 0.86034212, + "epoch": 0.1573889993373095, + "grad_norm": 4.0454814939907875, + "learning_rate": 7.6922689117093e-06, + "loss": 0.69349651, + "memory(GiB)": 33.01, + "step": 475, + "train_speed(iter/s)": 0.191194 + }, + { + "acc": 0.85979271, + "epoch": 0.15904572564612326, + "grad_norm": 3.7930235007194, + "learning_rate": 7.7053378619407e-06, + "loss": 0.70015211, + "memory(GiB)": 33.01, + "step": 480, + "train_speed(iter/s)": 0.191206 + }, + { + "acc": 0.85328636, + "epoch": 0.16070245195493704, + "grad_norm": 7.599227481118352, + "learning_rate": 7.718271380199416e-06, + "loss": 0.69486294, + "memory(GiB)": 33.01, + "step": 485, + "train_speed(iter/s)": 0.191223 + }, + { + "acc": 0.85639191, + "epoch": 0.16235917826375082, + "grad_norm": 11.596092389773954, + "learning_rate": 7.73107224465102e-06, + "loss": 0.70257854, + "memory(GiB)": 33.01, + "step": 490, + "train_speed(iter/s)": 0.191238 + }, + { + "acc": 0.84910851, + "epoch": 0.16401590457256462, + "grad_norm": 9.20624588132978, + "learning_rate": 7.743743148843951e-06, + "loss": 0.81106377, + "memory(GiB)": 33.01, + "step": 495, + "train_speed(iter/s)": 0.191251 + }, + { + "acc": 0.84651041, + "epoch": 0.1656726308813784, + "grad_norm": 6.889524320452217, + "learning_rate": 7.756286705111353e-06, + "loss": 0.74286199, + "memory(GiB)": 33.01, + "step": 500, + "train_speed(iter/s)": 0.191263 + }, + { + "acc": 0.84956284, + "epoch": 0.16732935719019218, + "grad_norm": 5.966460531077995, + "learning_rate": 7.768705447803669e-06, + "loss": 0.74496927, + "memory(GiB)": 33.01, + "step": 505, + "train_speed(iter/s)": 0.191279 + }, + { + "acc": 0.84233456, + "epoch": 0.16898608349900596, + "grad_norm": 4.027572019536729, + "learning_rate": 7.781001836361969e-06, + "loss": 0.75807991, + "memory(GiB)": 33.01, + "step": 510, + "train_speed(iter/s)": 0.191293 + }, + { + "acc": 0.83378162, + "epoch": 0.17064280980781976, + "grad_norm": 3.5102186121946226, + "learning_rate": 7.79317825824136e-06, + "loss": 0.77981734, + "memory(GiB)": 33.01, + "step": 515, + "train_speed(iter/s)": 0.191308 + }, + { + "acc": 0.84073734, + "epoch": 0.17229953611663354, + "grad_norm": 5.853079987918349, + "learning_rate": 7.805237031693173e-06, + "loss": 0.78628778, + "memory(GiB)": 33.01, + "step": 520, + "train_speed(iter/s)": 0.19132 + }, + { + "acc": 0.84980087, + "epoch": 0.17395626242544732, + "grad_norm": 3.5454605857696357, + "learning_rate": 7.817180408414004e-06, + "loss": 0.76987505, + "memory(GiB)": 33.01, + "step": 525, + "train_speed(iter/s)": 0.191332 + }, + { + "acc": 0.84348345, + "epoch": 0.1756129887342611, + "grad_norm": 4.598681189813219, + "learning_rate": 7.829010576069216e-06, + "loss": 0.73552933, + "memory(GiB)": 33.01, + "step": 530, + "train_speed(iter/s)": 0.191345 + }, + { + "acc": 0.85787067, + "epoch": 0.17726971504307487, + "grad_norm": 4.604010264001306, + "learning_rate": 7.840729660697963e-06, + "loss": 0.71789432, + "memory(GiB)": 33.01, + "step": 535, + "train_speed(iter/s)": 0.191361 + }, + { + "acc": 0.84588737, + "epoch": 0.17892644135188868, + "grad_norm": 4.830877787847248, + "learning_rate": 7.852339729006332e-06, + "loss": 0.74034619, + "memory(GiB)": 33.01, + "step": 540, + "train_speed(iter/s)": 0.191371 + }, + { + "acc": 0.85173817, + "epoch": 0.18058316766070245, + "grad_norm": 2.8699572644791473, + "learning_rate": 7.863842790554826e-06, + "loss": 0.72552185, + "memory(GiB)": 33.01, + "step": 545, + "train_speed(iter/s)": 0.191382 + }, + { + "acc": 0.86452513, + "epoch": 0.18223989396951623, + "grad_norm": 7.246596519208709, + "learning_rate": 7.875240799845943e-06, + "loss": 0.74848604, + "memory(GiB)": 33.01, + "step": 550, + "train_speed(iter/s)": 0.191393 + }, + { + "acc": 0.8572896, + "epoch": 0.18389662027833, + "grad_norm": 3.7520051139701445, + "learning_rate": 7.886535658317292e-06, + "loss": 0.71552725, + "memory(GiB)": 33.01, + "step": 555, + "train_speed(iter/s)": 0.191405 + }, + { + "acc": 0.86681576, + "epoch": 0.1855533465871438, + "grad_norm": 3.5009893983455798, + "learning_rate": 7.897729216245343e-06, + "loss": 0.69532685, + "memory(GiB)": 33.01, + "step": 560, + "train_speed(iter/s)": 0.191418 + }, + { + "acc": 0.85972776, + "epoch": 0.1872100728959576, + "grad_norm": 5.035058722986529, + "learning_rate": 7.908823274564535e-06, + "loss": 0.69615574, + "memory(GiB)": 33.01, + "step": 565, + "train_speed(iter/s)": 0.191428 + }, + { + "acc": 0.84751177, + "epoch": 0.18886679920477137, + "grad_norm": 9.317941270292748, + "learning_rate": 7.919819586606273e-06, + "loss": 0.76375556, + "memory(GiB)": 33.01, + "step": 570, + "train_speed(iter/s)": 0.191435 + }, + { + "acc": 0.85486727, + "epoch": 0.19052352551358515, + "grad_norm": 4.402628274507517, + "learning_rate": 7.930719859761959e-06, + "loss": 0.73269348, + "memory(GiB)": 33.01, + "step": 575, + "train_speed(iter/s)": 0.191444 + }, + { + "acc": 0.847686, + "epoch": 0.19218025182239895, + "grad_norm": 6.094219099288395, + "learning_rate": 7.94152575707405e-06, + "loss": 0.75301404, + "memory(GiB)": 33.01, + "step": 580, + "train_speed(iter/s)": 0.191453 + }, + { + "acc": 0.85120449, + "epoch": 0.19383697813121273, + "grad_norm": 7.83197612062828, + "learning_rate": 7.952238898758805e-06, + "loss": 0.73857994, + "memory(GiB)": 33.01, + "step": 585, + "train_speed(iter/s)": 0.191465 + }, + { + "acc": 0.86423912, + "epoch": 0.1954937044400265, + "grad_norm": 5.655476710354406, + "learning_rate": 7.962860863664248e-06, + "loss": 0.66899776, + "memory(GiB)": 33.01, + "step": 590, + "train_speed(iter/s)": 0.191477 + }, + { + "acc": 0.85700264, + "epoch": 0.19715043074884028, + "grad_norm": 7.116192961530635, + "learning_rate": 7.97339319066661e-06, + "loss": 0.72598734, + "memory(GiB)": 33.01, + "step": 595, + "train_speed(iter/s)": 0.191488 + }, + { + "acc": 0.86150017, + "epoch": 0.1988071570576541, + "grad_norm": 6.748827026936556, + "learning_rate": 7.983837380008324e-06, + "loss": 0.67497487, + "memory(GiB)": 33.01, + "step": 600, + "train_speed(iter/s)": 0.191498 + }, + { + "acc": 0.85967827, + "epoch": 0.20046388336646787, + "grad_norm": 10.998154682065033, + "learning_rate": 7.994194894580531e-06, + "loss": 0.68231859, + "memory(GiB)": 33.01, + "step": 605, + "train_speed(iter/s)": 0.191485 + }, + { + "acc": 0.85635414, + "epoch": 0.20212060967528164, + "grad_norm": 4.5305781096214615, + "learning_rate": 8.00446716115277e-06, + "loss": 0.70385208, + "memory(GiB)": 33.01, + "step": 610, + "train_speed(iter/s)": 0.191497 + }, + { + "acc": 0.86400633, + "epoch": 0.20377733598409542, + "grad_norm": 8.628073552208958, + "learning_rate": 8.014655571552501e-06, + "loss": 0.69599981, + "memory(GiB)": 33.01, + "step": 615, + "train_speed(iter/s)": 0.191488 + }, + { + "acc": 0.85632267, + "epoch": 0.20543406229290923, + "grad_norm": 4.386175166940569, + "learning_rate": 8.024761483796864e-06, + "loss": 0.73796282, + "memory(GiB)": 33.01, + "step": 620, + "train_speed(iter/s)": 0.191498 + }, + { + "acc": 0.85163746, + "epoch": 0.207090788601723, + "grad_norm": 4.350461930883511, + "learning_rate": 8.034786223178978e-06, + "loss": 0.71875296, + "memory(GiB)": 33.01, + "step": 625, + "train_speed(iter/s)": 0.191504 + }, + { + "acc": 0.84534607, + "epoch": 0.20874751491053678, + "grad_norm": 6.498345108224841, + "learning_rate": 8.044731083310975e-06, + "loss": 0.77620149, + "memory(GiB)": 33.01, + "step": 630, + "train_speed(iter/s)": 0.191515 + }, + { + "acc": 0.85141335, + "epoch": 0.21040424121935056, + "grad_norm": 5.992706293797647, + "learning_rate": 8.054597327125787e-06, + "loss": 0.74544926, + "memory(GiB)": 33.01, + "step": 635, + "train_speed(iter/s)": 0.191521 + }, + { + "acc": 0.85580578, + "epoch": 0.21206096752816433, + "grad_norm": 2.7314353914768557, + "learning_rate": 8.064386187839663e-06, + "loss": 0.69488735, + "memory(GiB)": 33.01, + "step": 640, + "train_speed(iter/s)": 0.191531 + }, + { + "acc": 0.86037788, + "epoch": 0.21371769383697814, + "grad_norm": 4.153344973196596, + "learning_rate": 8.074098869877223e-06, + "loss": 0.69857273, + "memory(GiB)": 33.01, + "step": 645, + "train_speed(iter/s)": 0.19154 + }, + { + "acc": 0.85764399, + "epoch": 0.21537442014579192, + "grad_norm": 4.729778970937635, + "learning_rate": 8.083736549760797e-06, + "loss": 0.69933314, + "memory(GiB)": 33.01, + "step": 650, + "train_speed(iter/s)": 0.191548 + }, + { + "acc": 0.86113453, + "epoch": 0.2170311464546057, + "grad_norm": 5.008229766391558, + "learning_rate": 8.093300376965687e-06, + "loss": 0.72642188, + "memory(GiB)": 33.01, + "step": 655, + "train_speed(iter/s)": 0.191556 + }, + { + "acc": 0.85428047, + "epoch": 0.21868787276341947, + "grad_norm": 4.990582646367713, + "learning_rate": 8.102791474742914e-06, + "loss": 0.70296922, + "memory(GiB)": 33.01, + "step": 660, + "train_speed(iter/s)": 0.191562 + }, + { + "acc": 0.8543005, + "epoch": 0.22034459907223328, + "grad_norm": 4.814734543712982, + "learning_rate": 8.112210940910912e-06, + "loss": 0.7326674, + "memory(GiB)": 33.01, + "step": 665, + "train_speed(iter/s)": 0.191571 + }, + { + "acc": 0.87087936, + "epoch": 0.22200132538104705, + "grad_norm": 4.274810521989821, + "learning_rate": 8.121559848617575e-06, + "loss": 0.68122473, + "memory(GiB)": 33.01, + "step": 670, + "train_speed(iter/s)": 0.191583 + }, + { + "acc": 0.86698112, + "epoch": 0.22365805168986083, + "grad_norm": 4.626368782095732, + "learning_rate": 8.130839247073958e-06, + "loss": 0.67305284, + "memory(GiB)": 33.01, + "step": 675, + "train_speed(iter/s)": 0.191592 + }, + { + "acc": 0.8643259, + "epoch": 0.2253147779986746, + "grad_norm": 6.924801505180445, + "learning_rate": 8.140050162260932e-06, + "loss": 0.6990859, + "memory(GiB)": 33.01, + "step": 680, + "train_speed(iter/s)": 0.191601 + }, + { + "acc": 0.86108046, + "epoch": 0.22697150430748841, + "grad_norm": 5.728666374981688, + "learning_rate": 8.149193597609926e-06, + "loss": 0.73946257, + "memory(GiB)": 33.01, + "step": 685, + "train_speed(iter/s)": 0.191606 + }, + { + "acc": 0.86837921, + "epoch": 0.2286282306163022, + "grad_norm": 4.343056660340748, + "learning_rate": 8.15827053465893e-06, + "loss": 0.7034698, + "memory(GiB)": 33.01, + "step": 690, + "train_speed(iter/s)": 0.191613 + }, + { + "acc": 0.86654921, + "epoch": 0.23028495692511597, + "grad_norm": 5.829114137408897, + "learning_rate": 8.167281933684813e-06, + "loss": 0.67937307, + "memory(GiB)": 33.01, + "step": 695, + "train_speed(iter/s)": 0.191621 + }, + { + "acc": 0.86075773, + "epoch": 0.23194168323392975, + "grad_norm": 4.078790318510965, + "learning_rate": 8.176228734312967e-06, + "loss": 0.68860335, + "memory(GiB)": 33.01, + "step": 700, + "train_speed(iter/s)": 0.191629 + }, + { + "acc": 0.84542274, + "epoch": 0.23359840954274355, + "grad_norm": 10.29612897068814, + "learning_rate": 8.185111856105267e-06, + "loss": 0.76998425, + "memory(GiB)": 33.01, + "step": 705, + "train_speed(iter/s)": 0.191637 + }, + { + "acc": 0.85347729, + "epoch": 0.23525513585155733, + "grad_norm": 5.772386392955135, + "learning_rate": 8.19393219912725e-06, + "loss": 0.73821373, + "memory(GiB)": 33.01, + "step": 710, + "train_speed(iter/s)": 0.191648 + }, + { + "acc": 0.86139927, + "epoch": 0.2369118621603711, + "grad_norm": 9.905356064900213, + "learning_rate": 8.202690644495385e-06, + "loss": 0.74055252, + "memory(GiB)": 33.01, + "step": 715, + "train_speed(iter/s)": 0.191653 + }, + { + "acc": 0.85751944, + "epoch": 0.23856858846918488, + "grad_norm": 3.526084318998935, + "learning_rate": 8.211388054905297e-06, + "loss": 0.68814349, + "memory(GiB)": 33.01, + "step": 720, + "train_speed(iter/s)": 0.191662 + }, + { + "acc": 0.85765324, + "epoch": 0.24022531477799866, + "grad_norm": 4.954944455850853, + "learning_rate": 8.220025275141674e-06, + "loss": 0.65789051, + "memory(GiB)": 33.01, + "step": 725, + "train_speed(iter/s)": 0.19167 + }, + { + "acc": 0.8511692, + "epoch": 0.24188204108681247, + "grad_norm": 4.155498568727979, + "learning_rate": 8.228603132570695e-06, + "loss": 0.73796372, + "memory(GiB)": 33.01, + "step": 730, + "train_speed(iter/s)": 0.191676 + }, + { + "acc": 0.86511383, + "epoch": 0.24353876739562624, + "grad_norm": 4.69163212202949, + "learning_rate": 8.237122437615616e-06, + "loss": 0.69170351, + "memory(GiB)": 33.01, + "step": 735, + "train_speed(iter/s)": 0.191682 + }, + { + "acc": 0.86455679, + "epoch": 0.24519549370444002, + "grad_norm": 6.091645099912835, + "learning_rate": 8.245583984216255e-06, + "loss": 0.66270704, + "memory(GiB)": 33.01, + "step": 740, + "train_speed(iter/s)": 0.191691 + }, + { + "acc": 0.86024208, + "epoch": 0.2468522200132538, + "grad_norm": 4.783219472099627, + "learning_rate": 8.253988550272997e-06, + "loss": 0.71306019, + "memory(GiB)": 33.01, + "step": 745, + "train_speed(iter/s)": 0.1917 + }, + { + "acc": 0.86008968, + "epoch": 0.2485089463220676, + "grad_norm": 8.073864065552426, + "learning_rate": 8.26233689807595e-06, + "loss": 0.7465075, + "memory(GiB)": 33.01, + "step": 750, + "train_speed(iter/s)": 0.191707 + }, + { + "acc": 0.86828327, + "epoch": 0.25016567263088135, + "grad_norm": 3.2745862282999, + "learning_rate": 8.270629774719843e-06, + "loss": 0.64948173, + "memory(GiB)": 33.01, + "step": 755, + "train_speed(iter/s)": 0.191715 + }, + { + "acc": 0.86992846, + "epoch": 0.25182239893969516, + "grad_norm": 4.671054926219876, + "learning_rate": 8.278867912505236e-06, + "loss": 0.7060751, + "memory(GiB)": 33.01, + "step": 760, + "train_speed(iter/s)": 0.191724 + }, + { + "acc": 0.86491318, + "epoch": 0.25347912524850896, + "grad_norm": 4.807854418287031, + "learning_rate": 8.287052029326564e-06, + "loss": 0.69029131, + "memory(GiB)": 33.01, + "step": 765, + "train_speed(iter/s)": 0.191733 + }, + { + "acc": 0.8653554, + "epoch": 0.2551358515573227, + "grad_norm": 7.960100790874098, + "learning_rate": 8.295182829047555e-06, + "loss": 0.75642958, + "memory(GiB)": 33.01, + "step": 770, + "train_speed(iter/s)": 0.19174 + }, + { + "acc": 0.87609472, + "epoch": 0.2567925778661365, + "grad_norm": 3.7332405404403177, + "learning_rate": 8.30326100186449e-06, + "loss": 0.63245564, + "memory(GiB)": 33.01, + "step": 775, + "train_speed(iter/s)": 0.191746 + }, + { + "acc": 0.86726351, + "epoch": 0.2584493041749503, + "grad_norm": 6.592949946816494, + "learning_rate": 8.31128722465777e-06, + "loss": 0.70761013, + "memory(GiB)": 33.01, + "step": 780, + "train_speed(iter/s)": 0.191751 + }, + { + "acc": 0.85083599, + "epoch": 0.2601060304837641, + "grad_norm": 7.117368440138149, + "learning_rate": 8.319262161332283e-06, + "loss": 0.79634342, + "memory(GiB)": 33.01, + "step": 785, + "train_speed(iter/s)": 0.191757 + }, + { + "acc": 0.85279007, + "epoch": 0.2617627567925779, + "grad_norm": 5.0439827198012415, + "learning_rate": 8.32718646314694e-06, + "loss": 0.75035219, + "memory(GiB)": 33.01, + "step": 790, + "train_speed(iter/s)": 0.191765 + }, + { + "acc": 0.85604877, + "epoch": 0.2634194831013916, + "grad_norm": 3.5927726013912524, + "learning_rate": 8.335060769033813e-06, + "loss": 0.68453722, + "memory(GiB)": 33.01, + "step": 795, + "train_speed(iter/s)": 0.191771 + }, + { + "acc": 0.8636591, + "epoch": 0.26507620941020543, + "grad_norm": 3.8977563793690533, + "learning_rate": 8.342885705907289e-06, + "loss": 0.71641669, + "memory(GiB)": 33.01, + "step": 800, + "train_speed(iter/s)": 0.191778 + }, + { + "acc": 0.8587285, + "epoch": 0.26673293571901924, + "grad_norm": 3.5022622627110644, + "learning_rate": 8.350661888963573e-06, + "loss": 0.70699081, + "memory(GiB)": 33.01, + "step": 805, + "train_speed(iter/s)": 0.191784 + }, + { + "acc": 0.86881943, + "epoch": 0.268389662027833, + "grad_norm": 5.7848711558388155, + "learning_rate": 8.358389921970929e-06, + "loss": 0.67217498, + "memory(GiB)": 33.01, + "step": 810, + "train_speed(iter/s)": 0.191789 + }, + { + "acc": 0.86003571, + "epoch": 0.2700463883366468, + "grad_norm": 5.597400102240786, + "learning_rate": 8.366070397550993e-06, + "loss": 0.68687806, + "memory(GiB)": 33.01, + "step": 815, + "train_speed(iter/s)": 0.191794 + }, + { + "acc": 0.85528393, + "epoch": 0.2717031146454606, + "grad_norm": 5.648428195440093, + "learning_rate": 8.373703897451464e-06, + "loss": 0.70997171, + "memory(GiB)": 33.01, + "step": 820, + "train_speed(iter/s)": 0.191799 + }, + { + "acc": 0.86729469, + "epoch": 0.27335984095427435, + "grad_norm": 12.497625345993981, + "learning_rate": 8.38129099281054e-06, + "loss": 0.6821249, + "memory(GiB)": 33.01, + "step": 825, + "train_speed(iter/s)": 0.191803 + }, + { + "acc": 0.8640686, + "epoch": 0.27501656726308815, + "grad_norm": 3.8320240234117833, + "learning_rate": 8.38883224441332e-06, + "loss": 0.66615601, + "memory(GiB)": 33.01, + "step": 830, + "train_speed(iter/s)": 0.191811 + }, + { + "acc": 0.86246319, + "epoch": 0.2766732935719019, + "grad_norm": 5.522117887322051, + "learning_rate": 8.396328202940556e-06, + "loss": 0.70188322, + "memory(GiB)": 33.01, + "step": 835, + "train_speed(iter/s)": 0.191789 + }, + { + "acc": 0.86381741, + "epoch": 0.2783300198807157, + "grad_norm": 4.87827516864088, + "learning_rate": 8.403779409209938e-06, + "loss": 0.70778923, + "memory(GiB)": 33.01, + "step": 840, + "train_speed(iter/s)": 0.191797 + }, + { + "acc": 0.86391449, + "epoch": 0.2799867461895295, + "grad_norm": 6.395734537229641, + "learning_rate": 8.41118639441024e-06, + "loss": 0.68261509, + "memory(GiB)": 33.01, + "step": 845, + "train_speed(iter/s)": 0.191789 + }, + { + "acc": 0.85789928, + "epoch": 0.28164347249834326, + "grad_norm": 4.5005658378847775, + "learning_rate": 8.418549680328555e-06, + "loss": 0.71203232, + "memory(GiB)": 33.01, + "step": 850, + "train_speed(iter/s)": 0.191793 + }, + { + "acc": 0.84966297, + "epoch": 0.28330019880715707, + "grad_norm": 6.821844315190172, + "learning_rate": 8.425869779570868e-06, + "loss": 0.73459692, + "memory(GiB)": 33.01, + "step": 855, + "train_speed(iter/s)": 0.191797 + }, + { + "acc": 0.8597559, + "epoch": 0.2849569251159708, + "grad_norm": 8.223348678276244, + "learning_rate": 8.433147195776186e-06, + "loss": 0.70681105, + "memory(GiB)": 33.01, + "step": 860, + "train_speed(iter/s)": 0.191804 + }, + { + "acc": 0.86295443, + "epoch": 0.2866136514247846, + "grad_norm": 4.871278284312656, + "learning_rate": 8.440382423824487e-06, + "loss": 0.69193487, + "memory(GiB)": 33.01, + "step": 865, + "train_speed(iter/s)": 0.19181 + }, + { + "acc": 0.86210175, + "epoch": 0.2882703777335984, + "grad_norm": 6.510180490620472, + "learning_rate": 8.447575950038647e-06, + "loss": 0.71838708, + "memory(GiB)": 33.01, + "step": 870, + "train_speed(iter/s)": 0.191816 + }, + { + "acc": 0.86748362, + "epoch": 0.2899271040424122, + "grad_norm": 3.9864645425046445, + "learning_rate": 8.45472825238059e-06, + "loss": 0.70167303, + "memory(GiB)": 33.01, + "step": 875, + "train_speed(iter/s)": 0.191821 + }, + { + "acc": 0.84462318, + "epoch": 0.291583830351226, + "grad_norm": 8.047064350711658, + "learning_rate": 8.461839800641877e-06, + "loss": 0.78728943, + "memory(GiB)": 33.01, + "step": 880, + "train_speed(iter/s)": 0.191829 + }, + { + "acc": 0.85642376, + "epoch": 0.2932405566600398, + "grad_norm": 7.117747216483004, + "learning_rate": 8.468911056628845e-06, + "loss": 0.72243757, + "memory(GiB)": 33.01, + "step": 885, + "train_speed(iter/s)": 0.191835 + }, + { + "acc": 0.85076056, + "epoch": 0.29489728296885354, + "grad_norm": 7.759588962424344, + "learning_rate": 8.475942474342575e-06, + "loss": 0.72990608, + "memory(GiB)": 33.01, + "step": 890, + "train_speed(iter/s)": 0.191839 + }, + { + "acc": 0.86660328, + "epoch": 0.29655400927766734, + "grad_norm": 8.99467532941417, + "learning_rate": 8.482934500153795e-06, + "loss": 0.71121721, + "memory(GiB)": 33.01, + "step": 895, + "train_speed(iter/s)": 0.191845 + }, + { + "acc": 0.86047487, + "epoch": 0.2982107355864811, + "grad_norm": 4.933016923281054, + "learning_rate": 8.48988757297292e-06, + "loss": 0.69261165, + "memory(GiB)": 33.01, + "step": 900, + "train_speed(iter/s)": 0.191852 + }, + { + "acc": 0.85310354, + "epoch": 0.2998674618952949, + "grad_norm": 4.166546187502097, + "learning_rate": 8.496802124415386e-06, + "loss": 0.76197157, + "memory(GiB)": 33.01, + "step": 905, + "train_speed(iter/s)": 0.191858 + }, + { + "acc": 0.8576828, + "epoch": 0.3015241882041087, + "grad_norm": 5.2864064643943545, + "learning_rate": 8.50367857896241e-06, + "loss": 0.71239004, + "memory(GiB)": 33.01, + "step": 910, + "train_speed(iter/s)": 0.191864 + }, + { + "acc": 0.84969053, + "epoch": 0.30318091451292245, + "grad_norm": 3.9445977679759054, + "learning_rate": 8.510517354117366e-06, + "loss": 0.72859564, + "memory(GiB)": 33.01, + "step": 915, + "train_speed(iter/s)": 0.191868 + }, + { + "acc": 0.86889191, + "epoch": 0.30483764082173626, + "grad_norm": 5.859753689117367, + "learning_rate": 8.517318860557895e-06, + "loss": 0.69389181, + "memory(GiB)": 33.01, + "step": 920, + "train_speed(iter/s)": 0.191873 + }, + { + "acc": 0.85660534, + "epoch": 0.30649436713055006, + "grad_norm": 7.020475009305861, + "learning_rate": 8.52408350228388e-06, + "loss": 0.71614857, + "memory(GiB)": 33.01, + "step": 925, + "train_speed(iter/s)": 0.191878 + }, + { + "acc": 0.84998512, + "epoch": 0.3081510934393638, + "grad_norm": 5.157060283328341, + "learning_rate": 8.530811676761459e-06, + "loss": 0.73749046, + "memory(GiB)": 33.01, + "step": 930, + "train_speed(iter/s)": 0.191882 + }, + { + "acc": 0.84837933, + "epoch": 0.3098078197481776, + "grad_norm": 4.68265820623965, + "learning_rate": 8.537503775063145e-06, + "loss": 0.74418221, + "memory(GiB)": 33.01, + "step": 935, + "train_speed(iter/s)": 0.191887 + }, + { + "acc": 0.85448933, + "epoch": 0.31146454605699136, + "grad_norm": 4.407864788888527, + "learning_rate": 8.54416018200423e-06, + "loss": 0.71617479, + "memory(GiB)": 33.01, + "step": 940, + "train_speed(iter/s)": 0.191892 + }, + { + "acc": 0.86693363, + "epoch": 0.31312127236580517, + "grad_norm": 7.0917965746171046, + "learning_rate": 8.550781276275572e-06, + "loss": 0.68734121, + "memory(GiB)": 33.01, + "step": 945, + "train_speed(iter/s)": 0.191898 + }, + { + "acc": 0.85768795, + "epoch": 0.314777998674619, + "grad_norm": 5.484400495408929, + "learning_rate": 8.55736743057286e-06, + "loss": 0.7659287, + "memory(GiB)": 33.01, + "step": 950, + "train_speed(iter/s)": 0.191904 + }, + { + "acc": 0.87186584, + "epoch": 0.3164347249834327, + "grad_norm": 4.2330702758674565, + "learning_rate": 8.56391901172251e-06, + "loss": 0.66473894, + "memory(GiB)": 33.01, + "step": 955, + "train_speed(iter/s)": 0.19191 + }, + { + "acc": 0.85993299, + "epoch": 0.31809145129224653, + "grad_norm": 6.565656450893978, + "learning_rate": 8.57043638080426e-06, + "loss": 0.71878262, + "memory(GiB)": 33.01, + "step": 960, + "train_speed(iter/s)": 0.191913 + }, + { + "acc": 0.86838036, + "epoch": 0.3197481776010603, + "grad_norm": 5.553837202597705, + "learning_rate": 8.576919893270573e-06, + "loss": 0.71313562, + "memory(GiB)": 33.01, + "step": 965, + "train_speed(iter/s)": 0.191918 + }, + { + "acc": 0.85277061, + "epoch": 0.3214049039098741, + "grad_norm": 8.886485634732379, + "learning_rate": 8.583369899062976e-06, + "loss": 0.68710203, + "memory(GiB)": 33.01, + "step": 970, + "train_speed(iter/s)": 0.191921 + }, + { + "acc": 0.85232639, + "epoch": 0.3230616302186879, + "grad_norm": 7.470695382757026, + "learning_rate": 8.589786742725394e-06, + "loss": 0.70246749, + "memory(GiB)": 33.01, + "step": 975, + "train_speed(iter/s)": 0.191925 + }, + { + "acc": 0.85501528, + "epoch": 0.32471835652750164, + "grad_norm": 6.6965708100001615, + "learning_rate": 8.59617076351458e-06, + "loss": 0.73256102, + "memory(GiB)": 33.01, + "step": 980, + "train_speed(iter/s)": 0.191928 + }, + { + "acc": 0.85887146, + "epoch": 0.32637508283631544, + "grad_norm": 4.528870100887346, + "learning_rate": 8.602522295507758e-06, + "loss": 0.69201508, + "memory(GiB)": 33.01, + "step": 985, + "train_speed(iter/s)": 0.191933 + }, + { + "acc": 0.87829342, + "epoch": 0.32803180914512925, + "grad_norm": 4.286242120110182, + "learning_rate": 8.60884166770751e-06, + "loss": 0.64423809, + "memory(GiB)": 33.01, + "step": 990, + "train_speed(iter/s)": 0.191934 + }, + { + "acc": 0.87213535, + "epoch": 0.329688535453943, + "grad_norm": 4.327022130858467, + "learning_rate": 8.61512920414405e-06, + "loss": 0.68861742, + "memory(GiB)": 33.01, + "step": 995, + "train_speed(iter/s)": 0.191939 + }, + { + "acc": 0.86555452, + "epoch": 0.3313452617627568, + "grad_norm": 7.30777548783786, + "learning_rate": 8.621385223974913e-06, + "loss": 0.68824911, + "memory(GiB)": 33.01, + "step": 1000, + "train_speed(iter/s)": 0.191945 + }, + { + "acc": 0.84967728, + "epoch": 0.33300198807157055, + "grad_norm": 4.126697856691726, + "learning_rate": 8.627610041582171e-06, + "loss": 0.77748609, + "memory(GiB)": 33.01, + "step": 1005, + "train_speed(iter/s)": 0.19195 + }, + { + "acc": 0.84659004, + "epoch": 0.33465871438038436, + "grad_norm": 6.212526850023021, + "learning_rate": 8.633803966667229e-06, + "loss": 0.76965971, + "memory(GiB)": 33.01, + "step": 1010, + "train_speed(iter/s)": 0.191953 + }, + { + "acc": 0.84827509, + "epoch": 0.33631544068919816, + "grad_norm": 7.927500563444142, + "learning_rate": 8.639967304343287e-06, + "loss": 0.74794369, + "memory(GiB)": 33.01, + "step": 1015, + "train_speed(iter/s)": 0.191958 + }, + { + "acc": 0.85209866, + "epoch": 0.3379721669980119, + "grad_norm": 4.986422162591401, + "learning_rate": 8.646100355225527e-06, + "loss": 0.7509851, + "memory(GiB)": 33.01, + "step": 1020, + "train_speed(iter/s)": 0.191963 + }, + { + "acc": 0.86078835, + "epoch": 0.3396288933068257, + "grad_norm": 3.8140029467261494, + "learning_rate": 8.65220341551909e-06, + "loss": 0.70069637, + "memory(GiB)": 33.01, + "step": 1025, + "train_speed(iter/s)": 0.191969 + }, + { + "acc": 0.8566391, + "epoch": 0.3412856196156395, + "grad_norm": 2.553467166890378, + "learning_rate": 8.65827677710492e-06, + "loss": 0.74217367, + "memory(GiB)": 33.01, + "step": 1030, + "train_speed(iter/s)": 0.191972 + }, + { + "acc": 0.86919012, + "epoch": 0.3429423459244533, + "grad_norm": 2.7168711420548965, + "learning_rate": 8.664320727623527e-06, + "loss": 0.69090486, + "memory(GiB)": 33.01, + "step": 1035, + "train_speed(iter/s)": 0.191976 + }, + { + "acc": 0.86263781, + "epoch": 0.3445990722332671, + "grad_norm": 4.255977142857873, + "learning_rate": 8.670335550556733e-06, + "loss": 0.67670717, + "memory(GiB)": 33.01, + "step": 1040, + "train_speed(iter/s)": 0.191978 + }, + { + "acc": 0.86686478, + "epoch": 0.34625579854208083, + "grad_norm": 5.9461619636194, + "learning_rate": 8.676321525307448e-06, + "loss": 0.71811056, + "memory(GiB)": 33.01, + "step": 1045, + "train_speed(iter/s)": 0.191982 + }, + { + "acc": 0.86780119, + "epoch": 0.34791252485089463, + "grad_norm": 4.411280283281894, + "learning_rate": 8.682278927277564e-06, + "loss": 0.68573217, + "memory(GiB)": 33.01, + "step": 1050, + "train_speed(iter/s)": 0.191986 + }, + { + "acc": 0.86082306, + "epoch": 0.34956925115970844, + "grad_norm": 4.277181873250783, + "learning_rate": 8.688208027943965e-06, + "loss": 0.73803396, + "memory(GiB)": 33.01, + "step": 1055, + "train_speed(iter/s)": 0.19199 + }, + { + "acc": 0.85872288, + "epoch": 0.3512259774685222, + "grad_norm": 9.513126878642865, + "learning_rate": 8.694109094932776e-06, + "loss": 0.6754879, + "memory(GiB)": 33.01, + "step": 1060, + "train_speed(iter/s)": 0.19198 + }, + { + "acc": 0.87298717, + "epoch": 0.352882703777336, + "grad_norm": 6.519096238018569, + "learning_rate": 8.699982392091845e-06, + "loss": 0.64740787, + "memory(GiB)": 33.01, + "step": 1065, + "train_speed(iter/s)": 0.191983 + }, + { + "acc": 0.86751633, + "epoch": 0.35453943008614974, + "grad_norm": 5.5293755748294275, + "learning_rate": 8.705828179561522e-06, + "loss": 0.69447689, + "memory(GiB)": 33.01, + "step": 1070, + "train_speed(iter/s)": 0.191975 + }, + { + "acc": 0.8515645, + "epoch": 0.35619615639496355, + "grad_norm": 7.815726388251606, + "learning_rate": 8.711646713843812e-06, + "loss": 0.73291807, + "memory(GiB)": 33.01, + "step": 1075, + "train_speed(iter/s)": 0.191979 + }, + { + "acc": 0.85659885, + "epoch": 0.35785288270377735, + "grad_norm": 4.736123045402512, + "learning_rate": 8.717438247869894e-06, + "loss": 0.73431463, + "memory(GiB)": 33.01, + "step": 1080, + "train_speed(iter/s)": 0.191984 + }, + { + "acc": 0.86045341, + "epoch": 0.3595096090125911, + "grad_norm": 4.962469685077272, + "learning_rate": 8.723203031066102e-06, + "loss": 0.72274265, + "memory(GiB)": 33.01, + "step": 1085, + "train_speed(iter/s)": 0.191988 + }, + { + "acc": 0.86184435, + "epoch": 0.3611663353214049, + "grad_norm": 30.571469764149068, + "learning_rate": 8.728941309418385e-06, + "loss": 0.70956249, + "memory(GiB)": 33.01, + "step": 1090, + "train_speed(iter/s)": 0.191991 + }, + { + "acc": 0.8445303, + "epoch": 0.3628230616302187, + "grad_norm": 3.1331808301036417, + "learning_rate": 8.734653325535291e-06, + "loss": 0.75802627, + "memory(GiB)": 33.01, + "step": 1095, + "train_speed(iter/s)": 0.191993 + }, + { + "acc": 0.86017399, + "epoch": 0.36447978793903246, + "grad_norm": 6.255926580668658, + "learning_rate": 8.740339318709502e-06, + "loss": 0.70160847, + "memory(GiB)": 33.01, + "step": 1100, + "train_speed(iter/s)": 0.191998 + }, + { + "acc": 0.84508801, + "epoch": 0.36613651424784627, + "grad_norm": 9.187962899454137, + "learning_rate": 8.745999524978e-06, + "loss": 0.7189342, + "memory(GiB)": 33.01, + "step": 1105, + "train_speed(iter/s)": 0.192002 + }, + { + "acc": 0.87337513, + "epoch": 0.36779324055666, + "grad_norm": 3.6099000748388286, + "learning_rate": 8.751634177180852e-06, + "loss": 0.66577797, + "memory(GiB)": 33.01, + "step": 1110, + "train_speed(iter/s)": 0.192007 + }, + { + "acc": 0.85580883, + "epoch": 0.3694499668654738, + "grad_norm": 8.80790263065911, + "learning_rate": 8.757243505018693e-06, + "loss": 0.70440307, + "memory(GiB)": 33.01, + "step": 1115, + "train_speed(iter/s)": 0.19201 + }, + { + "acc": 0.84594364, + "epoch": 0.3711066931742876, + "grad_norm": 7.509362597486998, + "learning_rate": 8.762827735108903e-06, + "loss": 0.81703548, + "memory(GiB)": 33.01, + "step": 1120, + "train_speed(iter/s)": 0.192013 + }, + { + "acc": 0.86294937, + "epoch": 0.3727634194831014, + "grad_norm": 3.0748196059001103, + "learning_rate": 8.768387091040546e-06, + "loss": 0.7328989, + "memory(GiB)": 33.01, + "step": 1125, + "train_speed(iter/s)": 0.192018 + }, + { + "acc": 0.86343956, + "epoch": 0.3744201457919152, + "grad_norm": 3.956163390393462, + "learning_rate": 8.773921793428094e-06, + "loss": 0.72786388, + "memory(GiB)": 33.01, + "step": 1130, + "train_speed(iter/s)": 0.192022 + }, + { + "acc": 0.85795031, + "epoch": 0.376076872100729, + "grad_norm": 4.431043841633779, + "learning_rate": 8.779432059963951e-06, + "loss": 0.6969532, + "memory(GiB)": 33.01, + "step": 1135, + "train_speed(iter/s)": 0.192023 + }, + { + "acc": 0.8584589, + "epoch": 0.37773359840954274, + "grad_norm": 5.089581286774482, + "learning_rate": 8.784918105469833e-06, + "loss": 0.71613669, + "memory(GiB)": 33.01, + "step": 1140, + "train_speed(iter/s)": 0.192026 + }, + { + "acc": 0.86626835, + "epoch": 0.37939032471835654, + "grad_norm": 6.792031152265121, + "learning_rate": 8.79038014194702e-06, + "loss": 0.6801815, + "memory(GiB)": 33.01, + "step": 1145, + "train_speed(iter/s)": 0.192028 + }, + { + "acc": 0.86356316, + "epoch": 0.3810470510271703, + "grad_norm": 3.8554059385913884, + "learning_rate": 8.795818378625519e-06, + "loss": 0.6736372, + "memory(GiB)": 33.01, + "step": 1150, + "train_speed(iter/s)": 0.19203 + }, + { + "acc": 0.86050854, + "epoch": 0.3827037773359841, + "grad_norm": 3.2608560334191634, + "learning_rate": 8.801233022012152e-06, + "loss": 0.69969139, + "memory(GiB)": 33.01, + "step": 1155, + "train_speed(iter/s)": 0.192034 + }, + { + "acc": 0.85761337, + "epoch": 0.3843605036447979, + "grad_norm": 4.699570624072087, + "learning_rate": 8.80662427593761e-06, + "loss": 0.67253723, + "memory(GiB)": 33.01, + "step": 1160, + "train_speed(iter/s)": 0.192037 + }, + { + "acc": 0.8686121, + "epoch": 0.38601722995361165, + "grad_norm": 7.116748808234933, + "learning_rate": 8.811992341602492e-06, + "loss": 0.65382185, + "memory(GiB)": 33.01, + "step": 1165, + "train_speed(iter/s)": 0.192039 + }, + { + "acc": 0.87032061, + "epoch": 0.38767395626242546, + "grad_norm": 5.413912217697706, + "learning_rate": 8.817337417622365e-06, + "loss": 0.63192997, + "memory(GiB)": 33.01, + "step": 1170, + "train_speed(iter/s)": 0.192043 + }, + { + "acc": 0.86420956, + "epoch": 0.3893306825712392, + "grad_norm": 4.546166827237802, + "learning_rate": 8.822659700071856e-06, + "loss": 0.66633849, + "memory(GiB)": 33.01, + "step": 1175, + "train_speed(iter/s)": 0.192047 + }, + { + "acc": 0.86204653, + "epoch": 0.390987408880053, + "grad_norm": 6.9094640693737, + "learning_rate": 8.82795938252781e-06, + "loss": 0.69639988, + "memory(GiB)": 33.01, + "step": 1180, + "train_speed(iter/s)": 0.192051 + }, + { + "acc": 0.85785131, + "epoch": 0.3926441351888668, + "grad_norm": 5.6236949781970855, + "learning_rate": 8.833236656111535e-06, + "loss": 0.67771921, + "memory(GiB)": 33.01, + "step": 1185, + "train_speed(iter/s)": 0.192054 + }, + { + "acc": 0.86071796, + "epoch": 0.39430086149768057, + "grad_norm": 6.366038395999916, + "learning_rate": 8.83849170953017e-06, + "loss": 0.66028328, + "memory(GiB)": 33.01, + "step": 1190, + "train_speed(iter/s)": 0.192059 + }, + { + "acc": 0.87674227, + "epoch": 0.39595758780649437, + "grad_norm": 3.308309235543562, + "learning_rate": 8.843724729117152e-06, + "loss": 0.64831953, + "memory(GiB)": 33.01, + "step": 1195, + "train_speed(iter/s)": 0.192063 + }, + { + "acc": 0.86043034, + "epoch": 0.3976143141153082, + "grad_norm": 8.783944646097877, + "learning_rate": 8.848935898871885e-06, + "loss": 0.66111774, + "memory(GiB)": 33.01, + "step": 1200, + "train_speed(iter/s)": 0.192065 + }, + { + "acc": 0.85789833, + "epoch": 0.3992710404241219, + "grad_norm": 8.268745115458769, + "learning_rate": 8.854125400498542e-06, + "loss": 0.7165966, + "memory(GiB)": 33.01, + "step": 1205, + "train_speed(iter/s)": 0.192065 + }, + { + "acc": 0.85992565, + "epoch": 0.40092776673293573, + "grad_norm": 4.890343178737042, + "learning_rate": 8.85929341344409e-06, + "loss": 0.69850774, + "memory(GiB)": 33.01, + "step": 1210, + "train_speed(iter/s)": 0.192068 + }, + { + "acc": 0.86116114, + "epoch": 0.4025844930417495, + "grad_norm": 3.4696663332007764, + "learning_rate": 8.864440114935526e-06, + "loss": 0.67585459, + "memory(GiB)": 33.01, + "step": 1215, + "train_speed(iter/s)": 0.192071 + }, + { + "acc": 0.86599884, + "epoch": 0.4042412193505633, + "grad_norm": 3.400335100949336, + "learning_rate": 8.86956568001633e-06, + "loss": 0.65663977, + "memory(GiB)": 33.01, + "step": 1220, + "train_speed(iter/s)": 0.192074 + }, + { + "acc": 0.85257549, + "epoch": 0.4058979456593771, + "grad_norm": 4.291480298842552, + "learning_rate": 8.874670281582205e-06, + "loss": 0.70670052, + "memory(GiB)": 33.01, + "step": 1225, + "train_speed(iter/s)": 0.192078 + }, + { + "acc": 0.87359371, + "epoch": 0.40755467196819084, + "grad_norm": 6.877937879926714, + "learning_rate": 8.879754090416061e-06, + "loss": 0.64880548, + "memory(GiB)": 33.01, + "step": 1230, + "train_speed(iter/s)": 0.192079 + }, + { + "acc": 0.86227837, + "epoch": 0.40921139827700465, + "grad_norm": 5.624219130124923, + "learning_rate": 8.884817275222304e-06, + "loss": 0.68625078, + "memory(GiB)": 33.01, + "step": 1235, + "train_speed(iter/s)": 0.192083 + }, + { + "acc": 0.86328125, + "epoch": 0.41086812458581845, + "grad_norm": 5.297235302637378, + "learning_rate": 8.889860002660424e-06, + "loss": 0.68548312, + "memory(GiB)": 33.01, + "step": 1240, + "train_speed(iter/s)": 0.192086 + }, + { + "acc": 0.8626215, + "epoch": 0.4125248508946322, + "grad_norm": 6.606333534535208, + "learning_rate": 8.894882437377916e-06, + "loss": 0.68742762, + "memory(GiB)": 33.01, + "step": 1245, + "train_speed(iter/s)": 0.192089 + }, + { + "acc": 0.85593128, + "epoch": 0.414181577203446, + "grad_norm": 5.780530438267892, + "learning_rate": 8.899884742042536e-06, + "loss": 0.72823448, + "memory(GiB)": 33.01, + "step": 1250, + "train_speed(iter/s)": 0.192092 + }, + { + "acc": 0.84919424, + "epoch": 0.41583830351225975, + "grad_norm": 5.168353177298467, + "learning_rate": 8.904867077373915e-06, + "loss": 0.73082142, + "memory(GiB)": 33.01, + "step": 1255, + "train_speed(iter/s)": 0.192095 + }, + { + "acc": 0.86038704, + "epoch": 0.41749502982107356, + "grad_norm": 4.612606387706837, + "learning_rate": 8.909829602174535e-06, + "loss": 0.69704094, + "memory(GiB)": 33.01, + "step": 1260, + "train_speed(iter/s)": 0.192098 + }, + { + "acc": 0.86871624, + "epoch": 0.41915175612988737, + "grad_norm": 3.3570483389467007, + "learning_rate": 8.914772473360109e-06, + "loss": 0.70701017, + "memory(GiB)": 33.01, + "step": 1265, + "train_speed(iter/s)": 0.192102 + }, + { + "acc": 0.86309032, + "epoch": 0.4208084824387011, + "grad_norm": 3.2628703081995862, + "learning_rate": 8.919695845989347e-06, + "loss": 0.68439708, + "memory(GiB)": 33.01, + "step": 1270, + "train_speed(iter/s)": 0.192104 + }, + { + "acc": 0.87284012, + "epoch": 0.4224652087475149, + "grad_norm": 3.4484603804666825, + "learning_rate": 8.924599873293152e-06, + "loss": 0.65062599, + "memory(GiB)": 33.01, + "step": 1275, + "train_speed(iter/s)": 0.192106 + }, + { + "acc": 0.86791553, + "epoch": 0.42412193505632867, + "grad_norm": 8.973513100825011, + "learning_rate": 8.929484706703225e-06, + "loss": 0.67699113, + "memory(GiB)": 33.01, + "step": 1280, + "train_speed(iter/s)": 0.192109 + }, + { + "acc": 0.87207718, + "epoch": 0.4257786613651425, + "grad_norm": 6.517508560105469, + "learning_rate": 8.934350495880123e-06, + "loss": 0.64380426, + "memory(GiB)": 33.01, + "step": 1285, + "train_speed(iter/s)": 0.192111 + }, + { + "acc": 0.866152, + "epoch": 0.4274353876739563, + "grad_norm": 8.404196643915192, + "learning_rate": 8.939197388740783e-06, + "loss": 0.67536287, + "memory(GiB)": 33.01, + "step": 1290, + "train_speed(iter/s)": 0.192094 + }, + { + "acc": 0.87534895, + "epoch": 0.42909211398277003, + "grad_norm": 3.2016932440015915, + "learning_rate": 8.944025531485495e-06, + "loss": 0.63085623, + "memory(GiB)": 33.01, + "step": 1295, + "train_speed(iter/s)": 0.192098 + }, + { + "acc": 0.87227936, + "epoch": 0.43074884029158383, + "grad_norm": 3.8717486104123005, + "learning_rate": 8.948835068624357e-06, + "loss": 0.61535788, + "memory(GiB)": 33.01, + "step": 1300, + "train_speed(iter/s)": 0.192092 + }, + { + "acc": 0.86590223, + "epoch": 0.43240556660039764, + "grad_norm": 4.708375337715233, + "learning_rate": 8.953626143003244e-06, + "loss": 0.64982243, + "memory(GiB)": 33.01, + "step": 1305, + "train_speed(iter/s)": 0.192094 + }, + { + "acc": 0.879953, + "epoch": 0.4340622929092114, + "grad_norm": 3.8588317689639102, + "learning_rate": 8.958398895829247e-06, + "loss": 0.65139589, + "memory(GiB)": 33.01, + "step": 1310, + "train_speed(iter/s)": 0.192096 + }, + { + "acc": 0.87267056, + "epoch": 0.4357190192180252, + "grad_norm": 11.343379083443592, + "learning_rate": 8.963153466695663e-06, + "loss": 0.67646942, + "memory(GiB)": 33.01, + "step": 1315, + "train_speed(iter/s)": 0.1921 + }, + { + "acc": 0.8704361, + "epoch": 0.43737574552683894, + "grad_norm": 5.908780003246269, + "learning_rate": 8.967889993606474e-06, + "loss": 0.66506381, + "memory(GiB)": 33.01, + "step": 1320, + "train_speed(iter/s)": 0.192103 + }, + { + "acc": 0.87443924, + "epoch": 0.43903247183565275, + "grad_norm": 3.891369282847532, + "learning_rate": 8.972608613000402e-06, + "loss": 0.6349452, + "memory(GiB)": 33.01, + "step": 1325, + "train_speed(iter/s)": 0.192105 + }, + { + "acc": 0.86603394, + "epoch": 0.44068919814446655, + "grad_norm": 4.56074550696968, + "learning_rate": 8.977309459774474e-06, + "loss": 0.71010776, + "memory(GiB)": 33.01, + "step": 1330, + "train_speed(iter/s)": 0.192108 + }, + { + "acc": 0.86591244, + "epoch": 0.4423459244532803, + "grad_norm": 3.7422516690378202, + "learning_rate": 8.981992667307172e-06, + "loss": 0.66119232, + "memory(GiB)": 33.01, + "step": 1335, + "train_speed(iter/s)": 0.19211 + }, + { + "acc": 0.86200562, + "epoch": 0.4440026507620941, + "grad_norm": 5.2778052922519745, + "learning_rate": 8.986658367481134e-06, + "loss": 0.70098228, + "memory(GiB)": 33.01, + "step": 1340, + "train_speed(iter/s)": 0.192113 + }, + { + "acc": 0.86360703, + "epoch": 0.44565937707090786, + "grad_norm": 3.5249559587809194, + "learning_rate": 8.99130669070545e-06, + "loss": 0.68683853, + "memory(GiB)": 33.01, + "step": 1345, + "train_speed(iter/s)": 0.192116 + }, + { + "acc": 0.86272469, + "epoch": 0.44731610337972166, + "grad_norm": 6.0971293097221375, + "learning_rate": 8.995937765937517e-06, + "loss": 0.67999382, + "memory(GiB)": 33.01, + "step": 1350, + "train_speed(iter/s)": 0.192121 + }, + { + "acc": 0.86906853, + "epoch": 0.44897282968853547, + "grad_norm": 4.865638580202748, + "learning_rate": 9.00055172070452e-06, + "loss": 0.67824078, + "memory(GiB)": 33.01, + "step": 1355, + "train_speed(iter/s)": 0.192126 + }, + { + "acc": 0.8755106, + "epoch": 0.4506295559973492, + "grad_norm": 2.9266592704460512, + "learning_rate": 9.00514868112449e-06, + "loss": 0.64030924, + "memory(GiB)": 33.01, + "step": 1360, + "train_speed(iter/s)": 0.192129 + }, + { + "acc": 0.86524611, + "epoch": 0.452286282306163, + "grad_norm": 5.091998354549703, + "learning_rate": 9.009728771927006e-06, + "loss": 0.71764174, + "memory(GiB)": 33.01, + "step": 1365, + "train_speed(iter/s)": 0.192134 + }, + { + "acc": 0.86362028, + "epoch": 0.45394300861497683, + "grad_norm": 5.36536430236198, + "learning_rate": 9.014292116473486e-06, + "loss": 0.72176256, + "memory(GiB)": 33.01, + "step": 1370, + "train_speed(iter/s)": 0.192138 + }, + { + "acc": 0.85922489, + "epoch": 0.4555997349237906, + "grad_norm": 5.6030024883421925, + "learning_rate": 9.018838836777126e-06, + "loss": 0.70936112, + "memory(GiB)": 33.01, + "step": 1375, + "train_speed(iter/s)": 0.192141 + }, + { + "acc": 0.86305695, + "epoch": 0.4572564612326044, + "grad_norm": 3.3750177882476686, + "learning_rate": 9.02336905352249e-06, + "loss": 0.69946198, + "memory(GiB)": 33.01, + "step": 1380, + "train_speed(iter/s)": 0.192145 + }, + { + "acc": 0.85531149, + "epoch": 0.45891318754141813, + "grad_norm": 6.3626938674314575, + "learning_rate": 9.02788288608471e-06, + "loss": 0.71012573, + "memory(GiB)": 33.01, + "step": 1385, + "train_speed(iter/s)": 0.192148 + }, + { + "acc": 0.84457617, + "epoch": 0.46056991385023194, + "grad_norm": 5.522657483538765, + "learning_rate": 9.032380452548372e-06, + "loss": 0.72450757, + "memory(GiB)": 33.01, + "step": 1390, + "train_speed(iter/s)": 0.192151 + }, + { + "acc": 0.87537985, + "epoch": 0.46222664015904574, + "grad_norm": 3.9361085118086874, + "learning_rate": 9.036861869726056e-06, + "loss": 0.6987277, + "memory(GiB)": 33.01, + "step": 1395, + "train_speed(iter/s)": 0.192155 + }, + { + "acc": 0.86897469, + "epoch": 0.4638833664678595, + "grad_norm": 5.337820215319609, + "learning_rate": 9.041327253176527e-06, + "loss": 0.6750411, + "memory(GiB)": 33.01, + "step": 1400, + "train_speed(iter/s)": 0.192159 + }, + { + "acc": 0.85538292, + "epoch": 0.4655400927766733, + "grad_norm": 3.4327882607856943, + "learning_rate": 9.045776717222626e-06, + "loss": 0.7254981, + "memory(GiB)": 33.01, + "step": 1405, + "train_speed(iter/s)": 0.192163 + }, + { + "acc": 0.87039423, + "epoch": 0.4671968190854871, + "grad_norm": 11.219916917664618, + "learning_rate": 9.050210374968827e-06, + "loss": 0.67206554, + "memory(GiB)": 33.01, + "step": 1410, + "train_speed(iter/s)": 0.192166 + }, + { + "acc": 0.85317097, + "epoch": 0.46885354539430085, + "grad_norm": 12.478943442240555, + "learning_rate": 9.054628338318491e-06, + "loss": 0.75601158, + "memory(GiB)": 33.01, + "step": 1415, + "train_speed(iter/s)": 0.19217 + }, + { + "acc": 0.87292891, + "epoch": 0.47051027170311466, + "grad_norm": 4.570693229667621, + "learning_rate": 9.05903071799081e-06, + "loss": 0.64433074, + "memory(GiB)": 33.01, + "step": 1420, + "train_speed(iter/s)": 0.192172 + }, + { + "acc": 0.86060562, + "epoch": 0.4721669980119284, + "grad_norm": 3.9476140322917415, + "learning_rate": 9.063417623537456e-06, + "loss": 0.69826007, + "memory(GiB)": 33.01, + "step": 1425, + "train_speed(iter/s)": 0.192175 + }, + { + "acc": 0.87749691, + "epoch": 0.4738237243207422, + "grad_norm": 3.679397051485356, + "learning_rate": 9.067789163358945e-06, + "loss": 0.66564126, + "memory(GiB)": 33.01, + "step": 1430, + "train_speed(iter/s)": 0.192178 + }, + { + "acc": 0.86229973, + "epoch": 0.475480450629556, + "grad_norm": 3.8901355992289397, + "learning_rate": 9.072145444720704e-06, + "loss": 0.69365253, + "memory(GiB)": 33.01, + "step": 1435, + "train_speed(iter/s)": 0.192181 + }, + { + "acc": 0.86734676, + "epoch": 0.47713717693836977, + "grad_norm": 5.701611205590434, + "learning_rate": 9.076486573768857e-06, + "loss": 0.70805588, + "memory(GiB)": 33.01, + "step": 1440, + "train_speed(iter/s)": 0.192183 + }, + { + "acc": 0.86186886, + "epoch": 0.47879390324718357, + "grad_norm": 4.134391868806285, + "learning_rate": 9.080812655545758e-06, + "loss": 0.71403632, + "memory(GiB)": 33.01, + "step": 1445, + "train_speed(iter/s)": 0.192186 + }, + { + "acc": 0.85843849, + "epoch": 0.4804506295559973, + "grad_norm": 8.870580174999937, + "learning_rate": 9.085123794005234e-06, + "loss": 0.70811243, + "memory(GiB)": 33.01, + "step": 1450, + "train_speed(iter/s)": 0.192189 + }, + { + "acc": 0.86999588, + "epoch": 0.4821073558648111, + "grad_norm": 2.8181385446303358, + "learning_rate": 9.089420092027573e-06, + "loss": 0.62644167, + "memory(GiB)": 33.01, + "step": 1455, + "train_speed(iter/s)": 0.19219 + }, + { + "acc": 0.87824745, + "epoch": 0.48376408217362493, + "grad_norm": 3.2066696614980414, + "learning_rate": 9.093701651434256e-06, + "loss": 0.62812157, + "memory(GiB)": 33.01, + "step": 1460, + "train_speed(iter/s)": 0.192193 + }, + { + "acc": 0.87342119, + "epoch": 0.4854208084824387, + "grad_norm": 2.698069952587491, + "learning_rate": 9.097968573002436e-06, + "loss": 0.61367073, + "memory(GiB)": 33.01, + "step": 1465, + "train_speed(iter/s)": 0.192195 + }, + { + "acc": 0.87189951, + "epoch": 0.4870775347912525, + "grad_norm": 3.6121057523052764, + "learning_rate": 9.102220956479176e-06, + "loss": 0.60860167, + "memory(GiB)": 33.01, + "step": 1470, + "train_speed(iter/s)": 0.192198 + }, + { + "acc": 0.86669426, + "epoch": 0.4887342611000663, + "grad_norm": 7.7696093549689005, + "learning_rate": 9.106458900595433e-06, + "loss": 0.68489089, + "memory(GiB)": 33.01, + "step": 1475, + "train_speed(iter/s)": 0.192201 + }, + { + "acc": 0.87739582, + "epoch": 0.49039098740888004, + "grad_norm": 6.282902416033516, + "learning_rate": 9.110682503079815e-06, + "loss": 0.64681358, + "memory(GiB)": 33.01, + "step": 1480, + "train_speed(iter/s)": 0.192204 + }, + { + "acc": 0.87366724, + "epoch": 0.49204771371769385, + "grad_norm": 6.148845107605522, + "learning_rate": 9.114891860672107e-06, + "loss": 0.68622417, + "memory(GiB)": 33.01, + "step": 1485, + "train_speed(iter/s)": 0.192207 + }, + { + "acc": 0.86645737, + "epoch": 0.4937044400265076, + "grad_norm": 7.102791414384401, + "learning_rate": 9.119087069136557e-06, + "loss": 0.66596956, + "memory(GiB)": 33.01, + "step": 1490, + "train_speed(iter/s)": 0.19221 + }, + { + "acc": 0.87296562, + "epoch": 0.4953611663353214, + "grad_norm": 9.74166225216245, + "learning_rate": 9.123268223274961e-06, + "loss": 0.66302705, + "memory(GiB)": 33.01, + "step": 1495, + "train_speed(iter/s)": 0.192212 + }, + { + "acc": 0.86229172, + "epoch": 0.4970178926441352, + "grad_norm": 5.848311959392031, + "learning_rate": 9.12743541693951e-06, + "loss": 0.70333743, + "memory(GiB)": 33.01, + "step": 1500, + "train_speed(iter/s)": 0.192216 + }, + { + "acc": 0.87268381, + "epoch": 0.49867461895294896, + "grad_norm": 4.150118092077552, + "learning_rate": 9.131588743045426e-06, + "loss": 0.64809484, + "memory(GiB)": 33.01, + "step": 1505, + "train_speed(iter/s)": 0.192218 + }, + { + "acc": 0.87299833, + "epoch": 0.5003313452617627, + "grad_norm": 4.538142415377428, + "learning_rate": 9.135728293583403e-06, + "loss": 0.63944387, + "memory(GiB)": 33.01, + "step": 1510, + "train_speed(iter/s)": 0.192221 + }, + { + "acc": 0.87983656, + "epoch": 0.5019880715705766, + "grad_norm": 3.08163951023743, + "learning_rate": 9.139854159631825e-06, + "loss": 0.63588171, + "memory(GiB)": 33.01, + "step": 1515, + "train_speed(iter/s)": 0.192215 + }, + { + "acc": 0.86658287, + "epoch": 0.5036447978793903, + "grad_norm": 4.263188194458355, + "learning_rate": 9.143966431368796e-06, + "loss": 0.67643175, + "memory(GiB)": 33.01, + "step": 1520, + "train_speed(iter/s)": 0.19221 + }, + { + "acc": 0.87198019, + "epoch": 0.5053015241882041, + "grad_norm": 4.218782555945772, + "learning_rate": 9.148065198083954e-06, + "loss": 0.6854351, + "memory(GiB)": 33.01, + "step": 1525, + "train_speed(iter/s)": 0.192204 + }, + { + "acc": 0.86166058, + "epoch": 0.5069582504970179, + "grad_norm": 5.475988037012612, + "learning_rate": 9.152150548190123e-06, + "loss": 0.68347588, + "memory(GiB)": 33.01, + "step": 1530, + "train_speed(iter/s)": 0.192208 + }, + { + "acc": 0.86631327, + "epoch": 0.5086149768058317, + "grad_norm": 6.588801301856675, + "learning_rate": 9.156222569234739e-06, + "loss": 0.66437025, + "memory(GiB)": 33.01, + "step": 1535, + "train_speed(iter/s)": 0.19221 + }, + { + "acc": 0.87238407, + "epoch": 0.5102717031146454, + "grad_norm": 3.9675870595193046, + "learning_rate": 9.160281347911115e-06, + "loss": 0.67067642, + "memory(GiB)": 33.01, + "step": 1540, + "train_speed(iter/s)": 0.192214 + }, + { + "acc": 0.86192398, + "epoch": 0.5119284294234593, + "grad_norm": 6.149770568367035, + "learning_rate": 9.164326970069515e-06, + "loss": 0.67884417, + "memory(GiB)": 33.01, + "step": 1545, + "train_speed(iter/s)": 0.192218 + }, + { + "acc": 0.85227327, + "epoch": 0.513585155732273, + "grad_norm": 5.331320167629407, + "learning_rate": 9.16835952072805e-06, + "loss": 0.75046368, + "memory(GiB)": 33.01, + "step": 1550, + "train_speed(iter/s)": 0.192221 + }, + { + "acc": 0.85830784, + "epoch": 0.5152418820410868, + "grad_norm": 3.7113603011577325, + "learning_rate": 9.17237908408339e-06, + "loss": 0.68575492, + "memory(GiB)": 33.01, + "step": 1555, + "train_speed(iter/s)": 0.192224 + }, + { + "acc": 0.85954647, + "epoch": 0.5168986083499006, + "grad_norm": 4.575805064916573, + "learning_rate": 9.176385743521328e-06, + "loss": 0.67908902, + "memory(GiB)": 33.01, + "step": 1560, + "train_speed(iter/s)": 0.192227 + }, + { + "acc": 0.87961903, + "epoch": 0.5185553346587144, + "grad_norm": 2.5861178107326013, + "learning_rate": 9.180379581627149e-06, + "loss": 0.62059913, + "memory(GiB)": 33.01, + "step": 1565, + "train_speed(iter/s)": 0.19223 + }, + { + "acc": 0.87046461, + "epoch": 0.5202120609675281, + "grad_norm": 5.7201873315095275, + "learning_rate": 9.184360680195843e-06, + "loss": 0.65617762, + "memory(GiB)": 33.01, + "step": 1570, + "train_speed(iter/s)": 0.192233 + }, + { + "acc": 0.87465582, + "epoch": 0.5218687872763419, + "grad_norm": 4.51594779110544, + "learning_rate": 9.18832912024216e-06, + "loss": 0.64734449, + "memory(GiB)": 33.01, + "step": 1575, + "train_speed(iter/s)": 0.192235 + }, + { + "acc": 0.87109985, + "epoch": 0.5235255135851558, + "grad_norm": 7.116594105071289, + "learning_rate": 9.1922849820105e-06, + "loss": 0.63359661, + "memory(GiB)": 33.01, + "step": 1580, + "train_speed(iter/s)": 0.192238 + }, + { + "acc": 0.87022362, + "epoch": 0.5251822398939695, + "grad_norm": 6.455544574812186, + "learning_rate": 9.196228344984651e-06, + "loss": 0.63237476, + "memory(GiB)": 33.01, + "step": 1585, + "train_speed(iter/s)": 0.192241 + }, + { + "acc": 0.86301365, + "epoch": 0.5268389662027833, + "grad_norm": 8.113697111420223, + "learning_rate": 9.200159287897373e-06, + "loss": 0.68346348, + "memory(GiB)": 33.01, + "step": 1590, + "train_speed(iter/s)": 0.192245 + }, + { + "acc": 0.87428303, + "epoch": 0.5284956925115971, + "grad_norm": 5.0813744801911644, + "learning_rate": 9.204077888739822e-06, + "loss": 0.62634401, + "memory(GiB)": 33.01, + "step": 1595, + "train_speed(iter/s)": 0.192247 + }, + { + "acc": 0.88175039, + "epoch": 0.5301524188204109, + "grad_norm": 6.862745130743622, + "learning_rate": 9.207984224770848e-06, + "loss": 0.5954772, + "memory(GiB)": 33.01, + "step": 1600, + "train_speed(iter/s)": 0.19225 + }, + { + "acc": 0.88652163, + "epoch": 0.5318091451292246, + "grad_norm": 5.095229068290511, + "learning_rate": 9.211878372526119e-06, + "loss": 0.61880541, + "memory(GiB)": 33.01, + "step": 1605, + "train_speed(iter/s)": 0.192254 + }, + { + "acc": 0.88491421, + "epoch": 0.5334658714380385, + "grad_norm": 7.683127686889883, + "learning_rate": 9.215760407827133e-06, + "loss": 0.57899289, + "memory(GiB)": 33.01, + "step": 1610, + "train_speed(iter/s)": 0.192257 + }, + { + "acc": 0.87476616, + "epoch": 0.5351225977468522, + "grad_norm": 7.282543101591841, + "learning_rate": 9.219630405790062e-06, + "loss": 0.67257605, + "memory(GiB)": 33.01, + "step": 1615, + "train_speed(iter/s)": 0.192261 + }, + { + "acc": 0.86599264, + "epoch": 0.536779324055666, + "grad_norm": 8.087444973583551, + "learning_rate": 9.223488440834489e-06, + "loss": 0.70106878, + "memory(GiB)": 33.01, + "step": 1620, + "train_speed(iter/s)": 0.192266 + }, + { + "acc": 0.86528091, + "epoch": 0.5384360503644798, + "grad_norm": 9.746503473311424, + "learning_rate": 9.227334586691982e-06, + "loss": 0.72642713, + "memory(GiB)": 33.01, + "step": 1625, + "train_speed(iter/s)": 0.192268 + }, + { + "acc": 0.86779509, + "epoch": 0.5400927766732936, + "grad_norm": 8.615700205965208, + "learning_rate": 9.231168916414552e-06, + "loss": 0.65600066, + "memory(GiB)": 33.01, + "step": 1630, + "train_speed(iter/s)": 0.19227 + }, + { + "acc": 0.86514606, + "epoch": 0.5417495029821073, + "grad_norm": 5.225316406515859, + "learning_rate": 9.234991502382982e-06, + "loss": 0.65406122, + "memory(GiB)": 33.01, + "step": 1635, + "train_speed(iter/s)": 0.192274 + }, + { + "acc": 0.86649914, + "epoch": 0.5434062292909212, + "grad_norm": 3.340031154531065, + "learning_rate": 9.238802416315024e-06, + "loss": 0.66948366, + "memory(GiB)": 33.01, + "step": 1640, + "train_speed(iter/s)": 0.192277 + }, + { + "acc": 0.87296982, + "epoch": 0.5450629555997349, + "grad_norm": 4.8852907299692445, + "learning_rate": 9.242601729273468e-06, + "loss": 0.6427763, + "memory(GiB)": 33.01, + "step": 1645, + "train_speed(iter/s)": 0.19228 + }, + { + "acc": 0.86559639, + "epoch": 0.5467196819085487, + "grad_norm": 6.885611781964723, + "learning_rate": 9.2463895116741e-06, + "loss": 0.64991856, + "memory(GiB)": 33.01, + "step": 1650, + "train_speed(iter/s)": 0.192283 + }, + { + "acc": 0.85878677, + "epoch": 0.5483764082173624, + "grad_norm": 7.638042979861447, + "learning_rate": 9.250165833293521e-06, + "loss": 0.70569301, + "memory(GiB)": 33.01, + "step": 1655, + "train_speed(iter/s)": 0.192285 + }, + { + "acc": 0.87370911, + "epoch": 0.5500331345261763, + "grad_norm": 5.079730129726356, + "learning_rate": 9.25393076327688e-06, + "loss": 0.61256323, + "memory(GiB)": 33.01, + "step": 1660, + "train_speed(iter/s)": 0.192288 + }, + { + "acc": 0.87358227, + "epoch": 0.55168986083499, + "grad_norm": 4.056124455478207, + "learning_rate": 9.257684370145448e-06, + "loss": 0.61129341, + "memory(GiB)": 33.01, + "step": 1665, + "train_speed(iter/s)": 0.192291 + }, + { + "acc": 0.86829548, + "epoch": 0.5533465871438038, + "grad_norm": 8.956941093648082, + "learning_rate": 9.261426721804117e-06, + "loss": 0.65413742, + "memory(GiB)": 33.01, + "step": 1670, + "train_speed(iter/s)": 0.192295 + }, + { + "acc": 0.87451134, + "epoch": 0.5550033134526177, + "grad_norm": 3.616833903612867, + "learning_rate": 9.265157885548758e-06, + "loss": 0.64173145, + "memory(GiB)": 33.01, + "step": 1675, + "train_speed(iter/s)": 0.192298 + }, + { + "acc": 0.88274603, + "epoch": 0.5566600397614314, + "grad_norm": 6.213948043754148, + "learning_rate": 9.2688779280735e-06, + "loss": 0.62117977, + "memory(GiB)": 33.01, + "step": 1680, + "train_speed(iter/s)": 0.192301 + }, + { + "acc": 0.87925549, + "epoch": 0.5583167660702452, + "grad_norm": 4.50004747939087, + "learning_rate": 9.272586915477856e-06, + "loss": 0.63414607, + "memory(GiB)": 33.01, + "step": 1685, + "train_speed(iter/s)": 0.192304 + }, + { + "acc": 0.87159424, + "epoch": 0.559973492379059, + "grad_norm": 7.536326528887464, + "learning_rate": 9.276284913273801e-06, + "loss": 0.65830579, + "memory(GiB)": 33.01, + "step": 1690, + "train_speed(iter/s)": 0.192307 + }, + { + "acc": 0.87907772, + "epoch": 0.5616302186878728, + "grad_norm": 3.8732555461468636, + "learning_rate": 9.27997198639269e-06, + "loss": 0.63627729, + "memory(GiB)": 33.01, + "step": 1695, + "train_speed(iter/s)": 0.19231 + }, + { + "acc": 0.8568985, + "epoch": 0.5632869449966865, + "grad_norm": 3.6926446557268657, + "learning_rate": 9.283648199192115e-06, + "loss": 0.68832617, + "memory(GiB)": 33.01, + "step": 1700, + "train_speed(iter/s)": 0.192314 + }, + { + "acc": 0.86911259, + "epoch": 0.5649436713055004, + "grad_norm": 4.167878346282884, + "learning_rate": 9.287313615462637e-06, + "loss": 0.6848526, + "memory(GiB)": 33.01, + "step": 1705, + "train_speed(iter/s)": 0.192317 + }, + { + "acc": 0.8830162, + "epoch": 0.5666003976143141, + "grad_norm": 4.031298965260666, + "learning_rate": 9.290968298434428e-06, + "loss": 0.62251148, + "memory(GiB)": 33.01, + "step": 1710, + "train_speed(iter/s)": 0.192321 + }, + { + "acc": 0.87023563, + "epoch": 0.5682571239231279, + "grad_norm": 4.574398905309359, + "learning_rate": 9.294612310783819e-06, + "loss": 0.68600483, + "memory(GiB)": 33.01, + "step": 1715, + "train_speed(iter/s)": 0.192324 + }, + { + "acc": 0.86422176, + "epoch": 0.5699138502319416, + "grad_norm": 5.657723860144203, + "learning_rate": 9.298245714639748e-06, + "loss": 0.67713032, + "memory(GiB)": 33.01, + "step": 1720, + "train_speed(iter/s)": 0.192327 + }, + { + "acc": 0.85824757, + "epoch": 0.5715705765407555, + "grad_norm": 6.949231919339707, + "learning_rate": 9.301868571590115e-06, + "loss": 0.72136383, + "memory(GiB)": 33.01, + "step": 1725, + "train_speed(iter/s)": 0.192329 + }, + { + "acc": 0.86491108, + "epoch": 0.5732273028495692, + "grad_norm": 4.511148122728733, + "learning_rate": 9.305480942688047e-06, + "loss": 0.65395832, + "memory(GiB)": 33.01, + "step": 1730, + "train_speed(iter/s)": 0.192333 + }, + { + "acc": 0.85937595, + "epoch": 0.574884029158383, + "grad_norm": 6.076865498510463, + "learning_rate": 9.309082888458076e-06, + "loss": 0.67800088, + "memory(GiB)": 33.01, + "step": 1735, + "train_speed(iter/s)": 0.192337 + }, + { + "acc": 0.85908699, + "epoch": 0.5765407554671969, + "grad_norm": 6.640583907081719, + "learning_rate": 9.312674468902207e-06, + "loss": 0.68193717, + "memory(GiB)": 33.01, + "step": 1740, + "train_speed(iter/s)": 0.19234 + }, + { + "acc": 0.86825771, + "epoch": 0.5781974817760106, + "grad_norm": 2.9913874023324505, + "learning_rate": 9.316255743505935e-06, + "loss": 0.65605841, + "memory(GiB)": 33.01, + "step": 1745, + "train_speed(iter/s)": 0.192328 + }, + { + "acc": 0.86895323, + "epoch": 0.5798542080848244, + "grad_norm": 3.509690754353086, + "learning_rate": 9.319826771244152e-06, + "loss": 0.6424942, + "memory(GiB)": 33.01, + "step": 1750, + "train_speed(iter/s)": 0.192331 + }, + { + "acc": 0.87688522, + "epoch": 0.5815109343936382, + "grad_norm": 5.585633436797879, + "learning_rate": 9.32338761058696e-06, + "loss": 0.64732981, + "memory(GiB)": 33.01, + "step": 1755, + "train_speed(iter/s)": 0.192328 + }, + { + "acc": 0.86736622, + "epoch": 0.583167660702452, + "grad_norm": 12.021249469029797, + "learning_rate": 9.326938319505438e-06, + "loss": 0.69735794, + "memory(GiB)": 33.01, + "step": 1760, + "train_speed(iter/s)": 0.192332 + }, + { + "acc": 0.85666466, + "epoch": 0.5848243870112657, + "grad_norm": 9.141066601954984, + "learning_rate": 9.330478955477281e-06, + "loss": 0.76363716, + "memory(GiB)": 33.01, + "step": 1765, + "train_speed(iter/s)": 0.192335 + }, + { + "acc": 0.86701384, + "epoch": 0.5864811133200796, + "grad_norm": 5.115855433133116, + "learning_rate": 9.334009575492406e-06, + "loss": 0.69254465, + "memory(GiB)": 33.01, + "step": 1770, + "train_speed(iter/s)": 0.192339 + }, + { + "acc": 0.87474461, + "epoch": 0.5881378396288933, + "grad_norm": 2.613935884238985, + "learning_rate": 9.337530236058433e-06, + "loss": 0.61972847, + "memory(GiB)": 33.01, + "step": 1775, + "train_speed(iter/s)": 0.192343 + }, + { + "acc": 0.8837429, + "epoch": 0.5897945659377071, + "grad_norm": 2.765768204997492, + "learning_rate": 9.341040993206135e-06, + "loss": 0.59840755, + "memory(GiB)": 33.01, + "step": 1780, + "train_speed(iter/s)": 0.192346 + }, + { + "acc": 0.87598438, + "epoch": 0.5914512922465208, + "grad_norm": 3.902385923622911, + "learning_rate": 9.344541902494764e-06, + "loss": 0.63060737, + "memory(GiB)": 33.01, + "step": 1785, + "train_speed(iter/s)": 0.192349 + }, + { + "acc": 0.87122345, + "epoch": 0.5931080185553347, + "grad_norm": 8.347631641211056, + "learning_rate": 9.348033019017355e-06, + "loss": 0.63137841, + "memory(GiB)": 33.01, + "step": 1790, + "train_speed(iter/s)": 0.192353 + }, + { + "acc": 0.88335676, + "epoch": 0.5947647448641484, + "grad_norm": 7.931165061428975, + "learning_rate": 9.351514397405894e-06, + "loss": 0.61477599, + "memory(GiB)": 33.01, + "step": 1795, + "train_speed(iter/s)": 0.192356 + }, + { + "acc": 0.88620911, + "epoch": 0.5964214711729622, + "grad_norm": 6.496412648981423, + "learning_rate": 9.35498609183648e-06, + "loss": 0.59670343, + "memory(GiB)": 33.01, + "step": 1800, + "train_speed(iter/s)": 0.19236 + }, + { + "acc": 0.87989988, + "epoch": 0.598078197481776, + "grad_norm": 5.81429505167616, + "learning_rate": 9.358448156034365e-06, + "loss": 0.58589425, + "memory(GiB)": 33.01, + "step": 1805, + "train_speed(iter/s)": 0.192364 + }, + { + "acc": 0.86021442, + "epoch": 0.5997349237905898, + "grad_norm": 7.387526808154697, + "learning_rate": 9.361900643278946e-06, + "loss": 0.72263103, + "memory(GiB)": 33.01, + "step": 1810, + "train_speed(iter/s)": 0.192367 + }, + { + "acc": 0.8685957, + "epoch": 0.6013916500994035, + "grad_norm": 3.295357400629394, + "learning_rate": 9.365343606408687e-06, + "loss": 0.6860177, + "memory(GiB)": 33.01, + "step": 1815, + "train_speed(iter/s)": 0.192372 + }, + { + "acc": 0.87111511, + "epoch": 0.6030483764082174, + "grad_norm": 3.2613474978085786, + "learning_rate": 9.368777097825971e-06, + "loss": 0.65456285, + "memory(GiB)": 33.01, + "step": 1820, + "train_speed(iter/s)": 0.192377 + }, + { + "acc": 0.86717224, + "epoch": 0.6047051027170312, + "grad_norm": 3.6842014911472183, + "learning_rate": 9.37220116950188e-06, + "loss": 0.65306549, + "memory(GiB)": 33.01, + "step": 1825, + "train_speed(iter/s)": 0.19238 + }, + { + "acc": 0.86679745, + "epoch": 0.6063618290258449, + "grad_norm": 7.862766096911152, + "learning_rate": 9.375615872980926e-06, + "loss": 0.67689962, + "memory(GiB)": 33.01, + "step": 1830, + "train_speed(iter/s)": 0.192385 + }, + { + "acc": 0.85430756, + "epoch": 0.6080185553346588, + "grad_norm": 4.942428662598694, + "learning_rate": 9.379021259385697e-06, + "loss": 0.70608482, + "memory(GiB)": 33.01, + "step": 1835, + "train_speed(iter/s)": 0.19239 + }, + { + "acc": 0.87286358, + "epoch": 0.6096752816434725, + "grad_norm": 3.6884051952135573, + "learning_rate": 9.382417379421453e-06, + "loss": 0.65542011, + "memory(GiB)": 33.01, + "step": 1840, + "train_speed(iter/s)": 0.192397 + }, + { + "acc": 0.87217827, + "epoch": 0.6113320079522863, + "grad_norm": 4.46087116014083, + "learning_rate": 9.385804283380658e-06, + "loss": 0.63779249, + "memory(GiB)": 33.01, + "step": 1845, + "train_speed(iter/s)": 0.192402 + }, + { + "acc": 0.87114782, + "epoch": 0.6129887342611001, + "grad_norm": 8.164798448258196, + "learning_rate": 9.38918202114744e-06, + "loss": 0.65047865, + "memory(GiB)": 33.01, + "step": 1850, + "train_speed(iter/s)": 0.192408 + }, + { + "acc": 0.87397671, + "epoch": 0.6146454605699139, + "grad_norm": 8.406113413807082, + "learning_rate": 9.392550642202016e-06, + "loss": 0.62601881, + "memory(GiB)": 33.01, + "step": 1855, + "train_speed(iter/s)": 0.192412 + }, + { + "acc": 0.87091293, + "epoch": 0.6163021868787276, + "grad_norm": 4.973757562448072, + "learning_rate": 9.395910195625019e-06, + "loss": 0.63574677, + "memory(GiB)": 33.01, + "step": 1860, + "train_speed(iter/s)": 0.192417 + }, + { + "acc": 0.87237234, + "epoch": 0.6179589131875414, + "grad_norm": 7.259794102040732, + "learning_rate": 9.399260730101813e-06, + "loss": 0.65975394, + "memory(GiB)": 33.01, + "step": 1865, + "train_speed(iter/s)": 0.192421 + }, + { + "acc": 0.87357645, + "epoch": 0.6196156394963552, + "grad_norm": 4.948254368813279, + "learning_rate": 9.402602293926707e-06, + "loss": 0.66142483, + "memory(GiB)": 33.01, + "step": 1870, + "train_speed(iter/s)": 0.192425 + }, + { + "acc": 0.86736622, + "epoch": 0.621272365805169, + "grad_norm": 10.71133344628175, + "learning_rate": 9.405934935007133e-06, + "loss": 0.7003459, + "memory(GiB)": 33.01, + "step": 1875, + "train_speed(iter/s)": 0.192431 + }, + { + "acc": 0.86193829, + "epoch": 0.6229290921139827, + "grad_norm": 12.594851237762438, + "learning_rate": 9.40925870086779e-06, + "loss": 0.7385016, + "memory(GiB)": 33.01, + "step": 1880, + "train_speed(iter/s)": 0.192435 + }, + { + "acc": 0.8484436, + "epoch": 0.6245858184227966, + "grad_norm": 8.874937082337999, + "learning_rate": 9.412573638654678e-06, + "loss": 0.72295904, + "memory(GiB)": 33.01, + "step": 1885, + "train_speed(iter/s)": 0.19244 + }, + { + "acc": 0.86035633, + "epoch": 0.6262425447316103, + "grad_norm": 6.769670782418024, + "learning_rate": 9.415879795139132e-06, + "loss": 0.74117517, + "memory(GiB)": 33.01, + "step": 1890, + "train_speed(iter/s)": 0.192446 + }, + { + "acc": 0.8743494, + "epoch": 0.6278992710404241, + "grad_norm": 5.426482655852995, + "learning_rate": 9.419177216721772e-06, + "loss": 0.64620824, + "memory(GiB)": 33.01, + "step": 1895, + "train_speed(iter/s)": 0.192449 + }, + { + "acc": 0.86501122, + "epoch": 0.629555997349238, + "grad_norm": 4.701645724724149, + "learning_rate": 9.42246594943642e-06, + "loss": 0.6478301, + "memory(GiB)": 33.01, + "step": 1900, + "train_speed(iter/s)": 0.192455 + }, + { + "acc": 0.86878777, + "epoch": 0.6312127236580517, + "grad_norm": 4.529349528660305, + "learning_rate": 9.425746038953943e-06, + "loss": 0.71059952, + "memory(GiB)": 33.01, + "step": 1905, + "train_speed(iter/s)": 0.19246 + }, + { + "acc": 0.86721001, + "epoch": 0.6328694499668654, + "grad_norm": 4.017732753580181, + "learning_rate": 9.42901753058607e-06, + "loss": 0.66993275, + "memory(GiB)": 33.01, + "step": 1910, + "train_speed(iter/s)": 0.192465 + }, + { + "acc": 0.85818319, + "epoch": 0.6345261762756793, + "grad_norm": 5.189942543150934, + "learning_rate": 9.432280469289143e-06, + "loss": 0.73596964, + "memory(GiB)": 33.01, + "step": 1915, + "train_speed(iter/s)": 0.19247 + }, + { + "acc": 0.8732935, + "epoch": 0.6361829025844931, + "grad_norm": 4.370475133792059, + "learning_rate": 9.43553489966782e-06, + "loss": 0.68717313, + "memory(GiB)": 33.01, + "step": 1920, + "train_speed(iter/s)": 0.192475 + }, + { + "acc": 0.85696383, + "epoch": 0.6378396288933068, + "grad_norm": 7.911266975338863, + "learning_rate": 9.43878086597874e-06, + "loss": 0.7042037, + "memory(GiB)": 33.01, + "step": 1925, + "train_speed(iter/s)": 0.19248 + }, + { + "acc": 0.86053925, + "epoch": 0.6394963552021206, + "grad_norm": 4.885586074549585, + "learning_rate": 9.442018412134133e-06, + "loss": 0.68545146, + "memory(GiB)": 33.01, + "step": 1930, + "train_speed(iter/s)": 0.192485 + }, + { + "acc": 0.87085686, + "epoch": 0.6411530815109344, + "grad_norm": 3.1038574062542796, + "learning_rate": 9.44524758170538e-06, + "loss": 0.66258116, + "memory(GiB)": 33.01, + "step": 1935, + "train_speed(iter/s)": 0.192491 + }, + { + "acc": 0.86892052, + "epoch": 0.6428098078197482, + "grad_norm": 3.7218539198228733, + "learning_rate": 9.448468417926538e-06, + "loss": 0.65869718, + "memory(GiB)": 33.01, + "step": 1940, + "train_speed(iter/s)": 0.192496 + }, + { + "acc": 0.87207308, + "epoch": 0.6444665341285619, + "grad_norm": 7.02007077139689, + "learning_rate": 9.451680963697802e-06, + "loss": 0.63378029, + "memory(GiB)": 33.01, + "step": 1945, + "train_speed(iter/s)": 0.192501 + }, + { + "acc": 0.87004795, + "epoch": 0.6461232604373758, + "grad_norm": 4.748978576741996, + "learning_rate": 9.454885261588954e-06, + "loss": 0.65019732, + "memory(GiB)": 33.01, + "step": 1950, + "train_speed(iter/s)": 0.192506 + }, + { + "acc": 0.87050247, + "epoch": 0.6477799867461895, + "grad_norm": 9.090275346449877, + "learning_rate": 9.458081353842721e-06, + "loss": 0.68803792, + "memory(GiB)": 33.01, + "step": 1955, + "train_speed(iter/s)": 0.192511 + }, + { + "acc": 0.85250912, + "epoch": 0.6494367130550033, + "grad_norm": 6.585603979797727, + "learning_rate": 9.46126928237814e-06, + "loss": 0.74050913, + "memory(GiB)": 33.01, + "step": 1960, + "train_speed(iter/s)": 0.192516 + }, + { + "acc": 0.86775932, + "epoch": 0.6510934393638171, + "grad_norm": 5.721359622127249, + "learning_rate": 9.464449088793843e-06, + "loss": 0.68673601, + "memory(GiB)": 33.01, + "step": 1965, + "train_speed(iter/s)": 0.192521 + }, + { + "acc": 0.85250912, + "epoch": 0.6527501656726309, + "grad_norm": 5.38426969316766, + "learning_rate": 9.467620814371318e-06, + "loss": 0.7278306, + "memory(GiB)": 33.01, + "step": 1970, + "train_speed(iter/s)": 0.192527 + }, + { + "acc": 0.87017155, + "epoch": 0.6544068919814446, + "grad_norm": 4.3969246956399735, + "learning_rate": 9.470784500078125e-06, + "loss": 0.69506721, + "memory(GiB)": 33.01, + "step": 1975, + "train_speed(iter/s)": 0.192516 + }, + { + "acc": 0.8760396, + "epoch": 0.6560636182902585, + "grad_norm": 2.313031591146396, + "learning_rate": 9.47394018657107e-06, + "loss": 0.63808465, + "memory(GiB)": 33.01, + "step": 1980, + "train_speed(iter/s)": 0.192516 + }, + { + "acc": 0.86906242, + "epoch": 0.6577203445990722, + "grad_norm": 3.4070078308033827, + "learning_rate": 9.477087914199344e-06, + "loss": 0.62466335, + "memory(GiB)": 33.01, + "step": 1985, + "train_speed(iter/s)": 0.192521 + }, + { + "acc": 0.87359676, + "epoch": 0.659377070907886, + "grad_norm": 5.308161945186524, + "learning_rate": 9.48022772300761e-06, + "loss": 0.67417727, + "memory(GiB)": 33.01, + "step": 1990, + "train_speed(iter/s)": 0.192528 + }, + { + "acc": 0.87883778, + "epoch": 0.6610337972166997, + "grad_norm": 5.225205276711047, + "learning_rate": 9.48335965273907e-06, + "loss": 0.63469763, + "memory(GiB)": 33.01, + "step": 1995, + "train_speed(iter/s)": 0.192533 + }, + { + "acc": 0.87891026, + "epoch": 0.6626905235255136, + "grad_norm": 5.226052280288049, + "learning_rate": 9.486483742838472e-06, + "loss": 0.63047624, + "memory(GiB)": 33.01, + "step": 2000, + "train_speed(iter/s)": 0.192539 + }, + { + "acc": 0.88026047, + "epoch": 0.6643472498343274, + "grad_norm": 8.743298332087017, + "learning_rate": 9.489600032455106e-06, + "loss": 0.61615877, + "memory(GiB)": 33.01, + "step": 2005, + "train_speed(iter/s)": 0.192545 + }, + { + "acc": 0.871735, + "epoch": 0.6660039761431411, + "grad_norm": 6.115424883254176, + "learning_rate": 9.492708560445731e-06, + "loss": 0.6272666, + "memory(GiB)": 33.01, + "step": 2010, + "train_speed(iter/s)": 0.19255 + }, + { + "acc": 0.86677151, + "epoch": 0.667660702451955, + "grad_norm": 5.472680938783484, + "learning_rate": 9.49580936537749e-06, + "loss": 0.69325242, + "memory(GiB)": 33.01, + "step": 2015, + "train_speed(iter/s)": 0.192555 + }, + { + "acc": 0.87383423, + "epoch": 0.6693174287607687, + "grad_norm": 7.415846100116536, + "learning_rate": 9.498902485530788e-06, + "loss": 0.64826474, + "memory(GiB)": 33.01, + "step": 2020, + "train_speed(iter/s)": 0.19256 + }, + { + "acc": 0.87591295, + "epoch": 0.6709741550695825, + "grad_norm": 4.961612622529448, + "learning_rate": 9.501987958902114e-06, + "loss": 0.64979839, + "memory(GiB)": 33.01, + "step": 2025, + "train_speed(iter/s)": 0.192565 + }, + { + "acc": 0.8747304, + "epoch": 0.6726308813783963, + "grad_norm": 3.7629995987160263, + "learning_rate": 9.505065823206848e-06, + "loss": 0.64468951, + "memory(GiB)": 33.01, + "step": 2030, + "train_speed(iter/s)": 0.19257 + }, + { + "acc": 0.8648468, + "epoch": 0.6742876076872101, + "grad_norm": 9.481191593906079, + "learning_rate": 9.50813611588203e-06, + "loss": 0.70373602, + "memory(GiB)": 33.01, + "step": 2035, + "train_speed(iter/s)": 0.192575 + }, + { + "acc": 0.87640219, + "epoch": 0.6759443339960238, + "grad_norm": 5.609814410215989, + "learning_rate": 9.511198874089086e-06, + "loss": 0.64934058, + "memory(GiB)": 33.01, + "step": 2040, + "train_speed(iter/s)": 0.192581 + }, + { + "acc": 0.85914211, + "epoch": 0.6776010603048377, + "grad_norm": 6.279392103025398, + "learning_rate": 9.514254134716537e-06, + "loss": 0.72303848, + "memory(GiB)": 33.01, + "step": 2045, + "train_speed(iter/s)": 0.192587 + }, + { + "acc": 0.86438618, + "epoch": 0.6792577866136514, + "grad_norm": 3.57928961851651, + "learning_rate": 9.51730193438265e-06, + "loss": 0.65385365, + "memory(GiB)": 33.01, + "step": 2050, + "train_speed(iter/s)": 0.192592 + }, + { + "acc": 0.86672287, + "epoch": 0.6809145129224652, + "grad_norm": 4.72146459698859, + "learning_rate": 9.52034230943808e-06, + "loss": 0.6649436, + "memory(GiB)": 33.01, + "step": 2055, + "train_speed(iter/s)": 0.192598 + }, + { + "acc": 0.86496525, + "epoch": 0.682571239231279, + "grad_norm": 3.9579348297855312, + "learning_rate": 9.52337529596848e-06, + "loss": 0.63084106, + "memory(GiB)": 33.01, + "step": 2060, + "train_speed(iter/s)": 0.192602 + }, + { + "acc": 0.85933104, + "epoch": 0.6842279655400928, + "grad_norm": 2.9487788456501884, + "learning_rate": 9.526400929797046e-06, + "loss": 0.69089756, + "memory(GiB)": 33.01, + "step": 2065, + "train_speed(iter/s)": 0.192609 + }, + { + "acc": 0.87197399, + "epoch": 0.6858846918489065, + "grad_norm": 6.630580178404849, + "learning_rate": 9.529419246487087e-06, + "loss": 0.68466773, + "memory(GiB)": 33.01, + "step": 2070, + "train_speed(iter/s)": 0.192614 + }, + { + "acc": 0.87497139, + "epoch": 0.6875414181577203, + "grad_norm": 5.261229659377707, + "learning_rate": 9.532430281344505e-06, + "loss": 0.65252924, + "memory(GiB)": 33.01, + "step": 2075, + "train_speed(iter/s)": 0.192619 + }, + { + "acc": 0.8781332, + "epoch": 0.6891981444665342, + "grad_norm": 5.791336126885148, + "learning_rate": 9.535434069420291e-06, + "loss": 0.64834123, + "memory(GiB)": 33.01, + "step": 2080, + "train_speed(iter/s)": 0.192624 + }, + { + "acc": 0.87972021, + "epoch": 0.6908548707753479, + "grad_norm": 6.244725426282354, + "learning_rate": 9.53843064551297e-06, + "loss": 0.60382318, + "memory(GiB)": 33.01, + "step": 2085, + "train_speed(iter/s)": 0.192629 + }, + { + "acc": 0.8692606, + "epoch": 0.6925115970841617, + "grad_norm": 3.3223224218287637, + "learning_rate": 9.54142004417101e-06, + "loss": 0.62219219, + "memory(GiB)": 33.01, + "step": 2090, + "train_speed(iter/s)": 0.192633 + }, + { + "acc": 0.86586094, + "epoch": 0.6941683233929755, + "grad_norm": 8.333006833340361, + "learning_rate": 9.54440229969522e-06, + "loss": 0.66129503, + "memory(GiB)": 33.01, + "step": 2095, + "train_speed(iter/s)": 0.192638 + }, + { + "acc": 0.87937078, + "epoch": 0.6958250497017893, + "grad_norm": 4.848522391848659, + "learning_rate": 9.547377446141123e-06, + "loss": 0.59719243, + "memory(GiB)": 33.01, + "step": 2100, + "train_speed(iter/s)": 0.192644 + }, + { + "acc": 0.87653017, + "epoch": 0.697481776010603, + "grad_norm": 6.058172300357878, + "learning_rate": 9.55034551732126e-06, + "loss": 0.6292902, + "memory(GiB)": 33.01, + "step": 2105, + "train_speed(iter/s)": 0.19265 + }, + { + "acc": 0.86440668, + "epoch": 0.6991385023194169, + "grad_norm": 5.422445091075002, + "learning_rate": 9.553306546807525e-06, + "loss": 0.65456238, + "memory(GiB)": 33.01, + "step": 2110, + "train_speed(iter/s)": 0.192656 + }, + { + "acc": 0.87416859, + "epoch": 0.7007952286282306, + "grad_norm": 5.593253876187401, + "learning_rate": 9.556260567933424e-06, + "loss": 0.63578682, + "memory(GiB)": 33.01, + "step": 2115, + "train_speed(iter/s)": 0.192661 + }, + { + "acc": 0.87171974, + "epoch": 0.7024519549370444, + "grad_norm": 5.7664175346274575, + "learning_rate": 9.559207613796336e-06, + "loss": 0.70321331, + "memory(GiB)": 33.01, + "step": 2120, + "train_speed(iter/s)": 0.192666 + }, + { + "acc": 0.8626276, + "epoch": 0.7041086812458582, + "grad_norm": 4.1724329783244345, + "learning_rate": 9.56214771725974e-06, + "loss": 0.65507898, + "memory(GiB)": 33.01, + "step": 2125, + "train_speed(iter/s)": 0.192671 + }, + { + "acc": 0.87635355, + "epoch": 0.705765407554672, + "grad_norm": 2.212910015605007, + "learning_rate": 9.565080910955405e-06, + "loss": 0.62007113, + "memory(GiB)": 33.01, + "step": 2130, + "train_speed(iter/s)": 0.192676 + }, + { + "acc": 0.8778676, + "epoch": 0.7074221338634857, + "grad_norm": 4.229729013305469, + "learning_rate": 9.568007227285568e-06, + "loss": 0.62818556, + "memory(GiB)": 33.01, + "step": 2135, + "train_speed(iter/s)": 0.192681 + }, + { + "acc": 0.86363354, + "epoch": 0.7090788601722995, + "grad_norm": 5.191226429441517, + "learning_rate": 9.570926698425084e-06, + "loss": 0.6825675, + "memory(GiB)": 33.01, + "step": 2140, + "train_speed(iter/s)": 0.192685 + }, + { + "acc": 0.87175541, + "epoch": 0.7107355864811133, + "grad_norm": 6.668956366113271, + "learning_rate": 9.573839356323542e-06, + "loss": 0.67435064, + "memory(GiB)": 33.01, + "step": 2145, + "train_speed(iter/s)": 0.192689 + }, + { + "acc": 0.86686783, + "epoch": 0.7123923127899271, + "grad_norm": 5.313814404839771, + "learning_rate": 9.576745232707372e-06, + "loss": 0.68711166, + "memory(GiB)": 33.01, + "step": 2150, + "train_speed(iter/s)": 0.192694 + }, + { + "acc": 0.8734129, + "epoch": 0.7140490390987408, + "grad_norm": 6.254802303643827, + "learning_rate": 9.57964435908191e-06, + "loss": 0.64754696, + "memory(GiB)": 33.01, + "step": 2155, + "train_speed(iter/s)": 0.192699 + }, + { + "acc": 0.86314135, + "epoch": 0.7157057654075547, + "grad_norm": 3.5194625347786626, + "learning_rate": 9.582536766733452e-06, + "loss": 0.68779392, + "memory(GiB)": 33.01, + "step": 2160, + "train_speed(iter/s)": 0.192705 + }, + { + "acc": 0.86745138, + "epoch": 0.7173624917163685, + "grad_norm": 7.667382579416203, + "learning_rate": 9.585422486731281e-06, + "loss": 0.64779177, + "memory(GiB)": 33.01, + "step": 2165, + "train_speed(iter/s)": 0.19271 + }, + { + "acc": 0.85620298, + "epoch": 0.7190192180251822, + "grad_norm": 11.09618897417527, + "learning_rate": 9.588301549929662e-06, + "loss": 0.79766526, + "memory(GiB)": 33.01, + "step": 2170, + "train_speed(iter/s)": 0.192714 + }, + { + "acc": 0.88470783, + "epoch": 0.7206759443339961, + "grad_norm": 4.853353795149172, + "learning_rate": 9.59117398696983e-06, + "loss": 0.66207132, + "memory(GiB)": 33.01, + "step": 2175, + "train_speed(iter/s)": 0.19272 + }, + { + "acc": 0.8712225, + "epoch": 0.7223326706428098, + "grad_norm": 4.423883047425217, + "learning_rate": 9.594039828281947e-06, + "loss": 0.63862867, + "memory(GiB)": 33.01, + "step": 2180, + "train_speed(iter/s)": 0.192725 + }, + { + "acc": 0.87145014, + "epoch": 0.7239893969516236, + "grad_norm": 3.296421168238937, + "learning_rate": 9.596899104087026e-06, + "loss": 0.65805697, + "memory(GiB)": 33.01, + "step": 2185, + "train_speed(iter/s)": 0.19273 + }, + { + "acc": 0.8778677, + "epoch": 0.7256461232604374, + "grad_norm": 5.366937991553024, + "learning_rate": 9.599751844398853e-06, + "loss": 0.64201961, + "memory(GiB)": 33.01, + "step": 2190, + "train_speed(iter/s)": 0.192735 + }, + { + "acc": 0.8649683, + "epoch": 0.7273028495692512, + "grad_norm": 4.97366153444764, + "learning_rate": 9.602598079025871e-06, + "loss": 0.66631365, + "memory(GiB)": 33.01, + "step": 2195, + "train_speed(iter/s)": 0.192739 + }, + { + "acc": 0.86420956, + "epoch": 0.7289595758780649, + "grad_norm": 4.878490585708368, + "learning_rate": 9.605437837573062e-06, + "loss": 0.66615968, + "memory(GiB)": 33.01, + "step": 2200, + "train_speed(iter/s)": 0.192739 + }, + { + "acc": 0.8645731, + "epoch": 0.7306163021868787, + "grad_norm": 9.691422276593839, + "learning_rate": 9.608271149443773e-06, + "loss": 0.66423512, + "memory(GiB)": 33.01, + "step": 2205, + "train_speed(iter/s)": 0.19273 + }, + { + "acc": 0.8680954, + "epoch": 0.7322730284956925, + "grad_norm": 5.243986361705653, + "learning_rate": 9.61109804384156e-06, + "loss": 0.68853683, + "memory(GiB)": 33.01, + "step": 2210, + "train_speed(iter/s)": 0.19273 + }, + { + "acc": 0.87544937, + "epoch": 0.7339297548045063, + "grad_norm": 4.484903741087824, + "learning_rate": 9.613918549771985e-06, + "loss": 0.64813089, + "memory(GiB)": 33.01, + "step": 2215, + "train_speed(iter/s)": 0.192736 + }, + { + "acc": 0.87906456, + "epoch": 0.73558648111332, + "grad_norm": 3.290790298652583, + "learning_rate": 9.616732696044411e-06, + "loss": 0.61578646, + "memory(GiB)": 33.01, + "step": 2220, + "train_speed(iter/s)": 0.19274 + }, + { + "acc": 0.8714159, + "epoch": 0.7372432074221339, + "grad_norm": 4.544670383323588, + "learning_rate": 9.619540511273759e-06, + "loss": 0.66664362, + "memory(GiB)": 33.01, + "step": 2225, + "train_speed(iter/s)": 0.192745 + }, + { + "acc": 0.87207813, + "epoch": 0.7388999337309476, + "grad_norm": 4.6854579440589275, + "learning_rate": 9.622342023882252e-06, + "loss": 0.66553741, + "memory(GiB)": 33.01, + "step": 2230, + "train_speed(iter/s)": 0.192751 + }, + { + "acc": 0.86470995, + "epoch": 0.7405566600397614, + "grad_norm": 5.073985532829242, + "learning_rate": 9.625137262101154e-06, + "loss": 0.68595324, + "memory(GiB)": 33.01, + "step": 2235, + "train_speed(iter/s)": 0.192755 + }, + { + "acc": 0.87021656, + "epoch": 0.7422133863485753, + "grad_norm": 3.4425971798138897, + "learning_rate": 9.627926253972462e-06, + "loss": 0.66654301, + "memory(GiB)": 33.01, + "step": 2240, + "train_speed(iter/s)": 0.19276 + }, + { + "acc": 0.86235189, + "epoch": 0.743870112657389, + "grad_norm": 3.8965782786196916, + "learning_rate": 9.630709027350606e-06, + "loss": 0.66385617, + "memory(GiB)": 33.01, + "step": 2245, + "train_speed(iter/s)": 0.192765 + }, + { + "acc": 0.86006021, + "epoch": 0.7455268389662028, + "grad_norm": 5.99644345544046, + "learning_rate": 9.633485609904106e-06, + "loss": 0.69552994, + "memory(GiB)": 33.01, + "step": 2250, + "train_speed(iter/s)": 0.192769 + }, + { + "acc": 0.87564688, + "epoch": 0.7471835652750166, + "grad_norm": 2.8001309984832328, + "learning_rate": 9.63625602911724e-06, + "loss": 0.63839378, + "memory(GiB)": 33.01, + "step": 2255, + "train_speed(iter/s)": 0.192773 + }, + { + "acc": 0.87822104, + "epoch": 0.7488402915838304, + "grad_norm": 4.297893306015321, + "learning_rate": 9.639020312291654e-06, + "loss": 0.61689587, + "memory(GiB)": 33.01, + "step": 2260, + "train_speed(iter/s)": 0.192779 + }, + { + "acc": 0.86783085, + "epoch": 0.7504970178926441, + "grad_norm": 26.487082811616887, + "learning_rate": 9.641778486548e-06, + "loss": 0.66026106, + "memory(GiB)": 33.01, + "step": 2265, + "train_speed(iter/s)": 0.192783 + }, + { + "acc": 0.87071695, + "epoch": 0.752153744201458, + "grad_norm": 3.2521989867655363, + "learning_rate": 9.644530578827511e-06, + "loss": 0.63276267, + "memory(GiB)": 33.01, + "step": 2270, + "train_speed(iter/s)": 0.192789 + }, + { + "acc": 0.86867542, + "epoch": 0.7538104705102717, + "grad_norm": 3.8223108119868083, + "learning_rate": 9.647276615893595e-06, + "loss": 0.68160901, + "memory(GiB)": 33.01, + "step": 2275, + "train_speed(iter/s)": 0.192793 + }, + { + "acc": 0.87834759, + "epoch": 0.7554671968190855, + "grad_norm": 3.939928542285973, + "learning_rate": 9.65001662433339e-06, + "loss": 0.62497997, + "memory(GiB)": 33.01, + "step": 2280, + "train_speed(iter/s)": 0.192798 + }, + { + "acc": 0.87994862, + "epoch": 0.7571239231278992, + "grad_norm": 3.6752016267248178, + "learning_rate": 9.652750630559314e-06, + "loss": 0.60249023, + "memory(GiB)": 33.01, + "step": 2285, + "train_speed(iter/s)": 0.192803 + }, + { + "acc": 0.86858349, + "epoch": 0.7587806494367131, + "grad_norm": 6.696075785561995, + "learning_rate": 9.655478660810578e-06, + "loss": 0.69671068, + "memory(GiB)": 33.01, + "step": 2290, + "train_speed(iter/s)": 0.192809 + }, + { + "acc": 0.86544838, + "epoch": 0.7604373757455268, + "grad_norm": 4.977378905291485, + "learning_rate": 9.65820074115472e-06, + "loss": 0.68020229, + "memory(GiB)": 33.01, + "step": 2295, + "train_speed(iter/s)": 0.192814 + }, + { + "acc": 0.87358656, + "epoch": 0.7620941020543406, + "grad_norm": 8.906192831262254, + "learning_rate": 9.660916897489078e-06, + "loss": 0.64444475, + "memory(GiB)": 33.01, + "step": 2300, + "train_speed(iter/s)": 0.192818 + }, + { + "acc": 0.87565355, + "epoch": 0.7637508283631544, + "grad_norm": 3.68508372434642, + "learning_rate": 9.663627155542284e-06, + "loss": 0.66665568, + "memory(GiB)": 33.01, + "step": 2305, + "train_speed(iter/s)": 0.192823 + }, + { + "acc": 0.87511635, + "epoch": 0.7654075546719682, + "grad_norm": 4.518392370164668, + "learning_rate": 9.666331540875712e-06, + "loss": 0.63670602, + "memory(GiB)": 33.01, + "step": 2310, + "train_speed(iter/s)": 0.192828 + }, + { + "acc": 0.87756433, + "epoch": 0.7670642809807819, + "grad_norm": 3.189881236903923, + "learning_rate": 9.66903007888494e-06, + "loss": 0.59702106, + "memory(GiB)": 33.01, + "step": 2315, + "train_speed(iter/s)": 0.192833 + }, + { + "acc": 0.87794418, + "epoch": 0.7687210072895958, + "grad_norm": 6.921943736323694, + "learning_rate": 9.67172279480117e-06, + "loss": 0.59983635, + "memory(GiB)": 33.01, + "step": 2320, + "train_speed(iter/s)": 0.192837 + }, + { + "acc": 0.87574348, + "epoch": 0.7703777335984096, + "grad_norm": 4.915893162263451, + "learning_rate": 9.674409713692644e-06, + "loss": 0.60740991, + "memory(GiB)": 33.01, + "step": 2325, + "train_speed(iter/s)": 0.192842 + }, + { + "acc": 0.87510929, + "epoch": 0.7720344599072233, + "grad_norm": 11.334691577781541, + "learning_rate": 9.677090860466051e-06, + "loss": 0.63948035, + "memory(GiB)": 33.01, + "step": 2330, + "train_speed(iter/s)": 0.192846 + }, + { + "acc": 0.8721446, + "epoch": 0.7736911862160372, + "grad_norm": 6.617023449279387, + "learning_rate": 9.679766259867908e-06, + "loss": 0.71691961, + "memory(GiB)": 33.01, + "step": 2335, + "train_speed(iter/s)": 0.19285 + }, + { + "acc": 0.87824421, + "epoch": 0.7753479125248509, + "grad_norm": 5.3101785187888195, + "learning_rate": 9.682435936485925e-06, + "loss": 0.62746267, + "memory(GiB)": 33.01, + "step": 2340, + "train_speed(iter/s)": 0.192855 + }, + { + "acc": 0.87435551, + "epoch": 0.7770046388336647, + "grad_norm": 6.644924418489874, + "learning_rate": 9.685099914750372e-06, + "loss": 0.62730703, + "memory(GiB)": 33.01, + "step": 2345, + "train_speed(iter/s)": 0.192859 + }, + { + "acc": 0.87127953, + "epoch": 0.7786613651424784, + "grad_norm": 3.5375242584252793, + "learning_rate": 9.687758218935415e-06, + "loss": 0.6645195, + "memory(GiB)": 33.01, + "step": 2350, + "train_speed(iter/s)": 0.192863 + }, + { + "acc": 0.87114382, + "epoch": 0.7803180914512923, + "grad_norm": 5.52235553077396, + "learning_rate": 9.69041087316044e-06, + "loss": 0.63387938, + "memory(GiB)": 33.01, + "step": 2355, + "train_speed(iter/s)": 0.192868 + }, + { + "acc": 0.87978954, + "epoch": 0.781974817760106, + "grad_norm": 6.25879731840057, + "learning_rate": 9.693057901391369e-06, + "loss": 0.62484179, + "memory(GiB)": 33.01, + "step": 2360, + "train_speed(iter/s)": 0.192873 + }, + { + "acc": 0.87951393, + "epoch": 0.7836315440689198, + "grad_norm": 5.480847419045661, + "learning_rate": 9.695699327441961e-06, + "loss": 0.63349662, + "memory(GiB)": 33.01, + "step": 2365, + "train_speed(iter/s)": 0.192879 + }, + { + "acc": 0.86821175, + "epoch": 0.7852882703777336, + "grad_norm": 5.457799408858417, + "learning_rate": 9.698335174975095e-06, + "loss": 0.64089489, + "memory(GiB)": 33.01, + "step": 2370, + "train_speed(iter/s)": 0.192883 + }, + { + "acc": 0.8801096, + "epoch": 0.7869449966865474, + "grad_norm": 3.4325244782448894, + "learning_rate": 9.700965467504045e-06, + "loss": 0.61814466, + "memory(GiB)": 33.01, + "step": 2375, + "train_speed(iter/s)": 0.192887 + }, + { + "acc": 0.87665138, + "epoch": 0.7886017229953611, + "grad_norm": 6.683363108167596, + "learning_rate": 9.703590228393729e-06, + "loss": 0.59398308, + "memory(GiB)": 33.01, + "step": 2380, + "train_speed(iter/s)": 0.192891 + }, + { + "acc": 0.87479162, + "epoch": 0.790258449304175, + "grad_norm": 3.3726614620556146, + "learning_rate": 9.70620948086197e-06, + "loss": 0.6071887, + "memory(GiB)": 33.01, + "step": 2385, + "train_speed(iter/s)": 0.192895 + }, + { + "acc": 0.87130003, + "epoch": 0.7919151756129887, + "grad_norm": 7.113120412038941, + "learning_rate": 9.708823247980712e-06, + "loss": 0.6394248, + "memory(GiB)": 33.01, + "step": 2390, + "train_speed(iter/s)": 0.192899 + }, + { + "acc": 0.85982437, + "epoch": 0.7935719019218025, + "grad_norm": 10.05497132850973, + "learning_rate": 9.711431552677255e-06, + "loss": 0.67967396, + "memory(GiB)": 33.01, + "step": 2395, + "train_speed(iter/s)": 0.192903 + }, + { + "acc": 0.89040947, + "epoch": 0.7952286282306164, + "grad_norm": 4.460223038735573, + "learning_rate": 9.714034417735445e-06, + "loss": 0.58384981, + "memory(GiB)": 33.01, + "step": 2400, + "train_speed(iter/s)": 0.192908 + }, + { + "acc": 0.87658396, + "epoch": 0.7968853545394301, + "grad_norm": 2.9971847928295734, + "learning_rate": 9.716631865796885e-06, + "loss": 0.59908514, + "memory(GiB)": 33.01, + "step": 2405, + "train_speed(iter/s)": 0.192911 + }, + { + "acc": 0.88406553, + "epoch": 0.7985420808482439, + "grad_norm": 5.596120844203542, + "learning_rate": 9.719223919362102e-06, + "loss": 0.59015636, + "memory(GiB)": 33.01, + "step": 2410, + "train_speed(iter/s)": 0.192916 + }, + { + "acc": 0.87028484, + "epoch": 0.8001988071570576, + "grad_norm": 7.31459559392489, + "learning_rate": 9.721810600791728e-06, + "loss": 0.66939793, + "memory(GiB)": 33.01, + "step": 2415, + "train_speed(iter/s)": 0.19292 + }, + { + "acc": 0.88224373, + "epoch": 0.8018555334658715, + "grad_norm": 5.106041137712201, + "learning_rate": 9.72439193230765e-06, + "loss": 0.58553267, + "memory(GiB)": 33.01, + "step": 2420, + "train_speed(iter/s)": 0.192924 + }, + { + "acc": 0.88108397, + "epoch": 0.8035122597746852, + "grad_norm": 3.6988250041450312, + "learning_rate": 9.726967935994161e-06, + "loss": 0.66949582, + "memory(GiB)": 33.01, + "step": 2425, + "train_speed(iter/s)": 0.192928 + }, + { + "acc": 0.86570663, + "epoch": 0.805168986083499, + "grad_norm": 9.916491784398852, + "learning_rate": 9.729538633799085e-06, + "loss": 0.66917977, + "memory(GiB)": 33.01, + "step": 2430, + "train_speed(iter/s)": 0.19292 + }, + { + "acc": 0.86554222, + "epoch": 0.8068257123923128, + "grad_norm": 9.44945007567293, + "learning_rate": 9.73210404753491e-06, + "loss": 0.68716011, + "memory(GiB)": 33.01, + "step": 2435, + "train_speed(iter/s)": 0.192915 + }, + { + "acc": 0.86351709, + "epoch": 0.8084824387011266, + "grad_norm": 6.906873336229187, + "learning_rate": 9.73466419887989e-06, + "loss": 0.7135231, + "memory(GiB)": 33.01, + "step": 2440, + "train_speed(iter/s)": 0.19292 + }, + { + "acc": 0.86703777, + "epoch": 0.8101391650099403, + "grad_norm": 7.012954588113596, + "learning_rate": 9.73721910937915e-06, + "loss": 0.68227029, + "memory(GiB)": 33.01, + "step": 2445, + "train_speed(iter/s)": 0.192924 + }, + { + "acc": 0.87811565, + "epoch": 0.8117958913187542, + "grad_norm": 4.698961796410276, + "learning_rate": 9.739768800445764e-06, + "loss": 0.63987741, + "memory(GiB)": 33.01, + "step": 2450, + "train_speed(iter/s)": 0.192928 + }, + { + "acc": 0.86939335, + "epoch": 0.8134526176275679, + "grad_norm": 4.205330590419783, + "learning_rate": 9.742313293361852e-06, + "loss": 0.621698, + "memory(GiB)": 33.01, + "step": 2455, + "train_speed(iter/s)": 0.192932 + }, + { + "acc": 0.86712561, + "epoch": 0.8151093439363817, + "grad_norm": 4.496722035371473, + "learning_rate": 9.74485260927962e-06, + "loss": 0.65557423, + "memory(GiB)": 33.01, + "step": 2460, + "train_speed(iter/s)": 0.192937 + }, + { + "acc": 0.89136581, + "epoch": 0.8167660702451955, + "grad_norm": 5.541308303048364, + "learning_rate": 9.747386769222437e-06, + "loss": 0.59083924, + "memory(GiB)": 33.01, + "step": 2465, + "train_speed(iter/s)": 0.192942 + }, + { + "acc": 0.86496525, + "epoch": 0.8184227965540093, + "grad_norm": 7.462728624822657, + "learning_rate": 9.749915794085862e-06, + "loss": 0.62141933, + "memory(GiB)": 33.01, + "step": 2470, + "train_speed(iter/s)": 0.192946 + }, + { + "acc": 0.8760519, + "epoch": 0.820079522862823, + "grad_norm": 5.589536885979733, + "learning_rate": 9.752439704638696e-06, + "loss": 0.6348043, + "memory(GiB)": 33.01, + "step": 2475, + "train_speed(iter/s)": 0.192949 + }, + { + "acc": 0.86904612, + "epoch": 0.8217362491716369, + "grad_norm": 3.553935619178476, + "learning_rate": 9.754958521523983e-06, + "loss": 0.65660267, + "memory(GiB)": 33.01, + "step": 2480, + "train_speed(iter/s)": 0.192954 + }, + { + "acc": 0.86920958, + "epoch": 0.8233929754804507, + "grad_norm": 8.160696258419575, + "learning_rate": 9.757472265260047e-06, + "loss": 0.63985438, + "memory(GiB)": 33.01, + "step": 2485, + "train_speed(iter/s)": 0.192957 + }, + { + "acc": 0.86231823, + "epoch": 0.8250497017892644, + "grad_norm": 7.278889798912907, + "learning_rate": 9.759980956241476e-06, + "loss": 0.6573843, + "memory(GiB)": 33.01, + "step": 2490, + "train_speed(iter/s)": 0.192962 + }, + { + "acc": 0.88577518, + "epoch": 0.8267064280980781, + "grad_norm": 2.5005106896064127, + "learning_rate": 9.762484614740126e-06, + "loss": 0.57792425, + "memory(GiB)": 33.01, + "step": 2495, + "train_speed(iter/s)": 0.192965 + }, + { + "acc": 0.88653049, + "epoch": 0.828363154406892, + "grad_norm": 3.5019587588066097, + "learning_rate": 9.764983260906098e-06, + "loss": 0.61271315, + "memory(GiB)": 33.01, + "step": 2500, + "train_speed(iter/s)": 0.192969 + }, + { + "acc": 0.8679534, + "epoch": 0.8300198807157058, + "grad_norm": 3.728792830280097, + "learning_rate": 9.767476914768714e-06, + "loss": 0.64312353, + "memory(GiB)": 33.01, + "step": 2505, + "train_speed(iter/s)": 0.192973 + }, + { + "acc": 0.87462015, + "epoch": 0.8316766070245195, + "grad_norm": 4.3700933875496135, + "learning_rate": 9.769965596237475e-06, + "loss": 0.60275097, + "memory(GiB)": 33.01, + "step": 2510, + "train_speed(iter/s)": 0.192976 + }, + { + "acc": 0.87128067, + "epoch": 0.8333333333333334, + "grad_norm": 7.612978714975665, + "learning_rate": 9.772449325103028e-06, + "loss": 0.65022855, + "memory(GiB)": 33.01, + "step": 2515, + "train_speed(iter/s)": 0.19298 + }, + { + "acc": 0.8678833, + "epoch": 0.8349900596421471, + "grad_norm": 7.616382306500804, + "learning_rate": 9.774928121038095e-06, + "loss": 0.64670591, + "memory(GiB)": 33.01, + "step": 2520, + "train_speed(iter/s)": 0.192983 + }, + { + "acc": 0.87310963, + "epoch": 0.8366467859509609, + "grad_norm": 3.3425387410443386, + "learning_rate": 9.777402003598412e-06, + "loss": 0.63883715, + "memory(GiB)": 33.01, + "step": 2525, + "train_speed(iter/s)": 0.192987 + }, + { + "acc": 0.8770895, + "epoch": 0.8383035122597747, + "grad_norm": 8.563408061589213, + "learning_rate": 9.779870992223668e-06, + "loss": 0.62872858, + "memory(GiB)": 33.01, + "step": 2530, + "train_speed(iter/s)": 0.192992 + }, + { + "acc": 0.87358456, + "epoch": 0.8399602385685885, + "grad_norm": 2.963533578178825, + "learning_rate": 9.782335106238396e-06, + "loss": 0.64085312, + "memory(GiB)": 33.01, + "step": 2535, + "train_speed(iter/s)": 0.192996 + }, + { + "acc": 0.87006836, + "epoch": 0.8416169648774022, + "grad_norm": 6.722505279492165, + "learning_rate": 9.784794364852908e-06, + "loss": 0.63312893, + "memory(GiB)": 33.01, + "step": 2540, + "train_speed(iter/s)": 0.193 + }, + { + "acc": 0.87112741, + "epoch": 0.8432736911862161, + "grad_norm": 3.870915323241978, + "learning_rate": 9.78724878716417e-06, + "loss": 0.61600509, + "memory(GiB)": 33.01, + "step": 2545, + "train_speed(iter/s)": 0.193003 + }, + { + "acc": 0.86981926, + "epoch": 0.8449304174950298, + "grad_norm": 7.411035478079732, + "learning_rate": 9.789698392156712e-06, + "loss": 0.62298193, + "memory(GiB)": 33.01, + "step": 2550, + "train_speed(iter/s)": 0.193007 + }, + { + "acc": 0.88824244, + "epoch": 0.8465871438038436, + "grad_norm": 5.175288938119396, + "learning_rate": 9.792143198703494e-06, + "loss": 0.57580199, + "memory(GiB)": 33.01, + "step": 2555, + "train_speed(iter/s)": 0.19301 + }, + { + "acc": 0.87506332, + "epoch": 0.8482438701126573, + "grad_norm": 3.0398547056625933, + "learning_rate": 9.794583225566784e-06, + "loss": 0.61144085, + "memory(GiB)": 33.01, + "step": 2560, + "train_speed(iter/s)": 0.193014 + }, + { + "acc": 0.88642302, + "epoch": 0.8499005964214712, + "grad_norm": 3.400815807481571, + "learning_rate": 9.797018491399024e-06, + "loss": 0.59812255, + "memory(GiB)": 33.01, + "step": 2565, + "train_speed(iter/s)": 0.193018 + }, + { + "acc": 0.89076796, + "epoch": 0.851557322730285, + "grad_norm": 5.2301657506873855, + "learning_rate": 9.799449014743683e-06, + "loss": 0.56942215, + "memory(GiB)": 33.01, + "step": 2570, + "train_speed(iter/s)": 0.193022 + }, + { + "acc": 0.87888985, + "epoch": 0.8532140490390987, + "grad_norm": 3.7446013107384593, + "learning_rate": 9.801874814036104e-06, + "loss": 0.59126511, + "memory(GiB)": 33.01, + "step": 2575, + "train_speed(iter/s)": 0.193026 + }, + { + "acc": 0.87289181, + "epoch": 0.8548707753479126, + "grad_norm": 7.364936938315251, + "learning_rate": 9.804295907604344e-06, + "loss": 0.68985062, + "memory(GiB)": 33.01, + "step": 2580, + "train_speed(iter/s)": 0.193029 + }, + { + "acc": 0.88986206, + "epoch": 0.8565275016567263, + "grad_norm": 5.180863897320667, + "learning_rate": 9.806712313670004e-06, + "loss": 0.54514599, + "memory(GiB)": 33.01, + "step": 2585, + "train_speed(iter/s)": 0.193033 + }, + { + "acc": 0.88568058, + "epoch": 0.8581842279655401, + "grad_norm": 3.9336394530292336, + "learning_rate": 9.809124050349054e-06, + "loss": 0.57606583, + "memory(GiB)": 33.01, + "step": 2590, + "train_speed(iter/s)": 0.193037 + }, + { + "acc": 0.8778533, + "epoch": 0.8598409542743539, + "grad_norm": 5.933507306693597, + "learning_rate": 9.811531135652644e-06, + "loss": 0.6583971, + "memory(GiB)": 33.01, + "step": 2595, + "train_speed(iter/s)": 0.193041 + }, + { + "acc": 0.87049732, + "epoch": 0.8614976805831677, + "grad_norm": 8.048986545194238, + "learning_rate": 9.813933587487917e-06, + "loss": 0.63537345, + "memory(GiB)": 33.01, + "step": 2600, + "train_speed(iter/s)": 0.193045 + }, + { + "acc": 0.88129292, + "epoch": 0.8631544068919814, + "grad_norm": 8.071277659969068, + "learning_rate": 9.8163314236588e-06, + "loss": 0.60130939, + "memory(GiB)": 33.01, + "step": 2605, + "train_speed(iter/s)": 0.193049 + }, + { + "acc": 0.86977844, + "epoch": 0.8648111332007953, + "grad_norm": 3.8453891682437282, + "learning_rate": 9.818724661866802e-06, + "loss": 0.60652094, + "memory(GiB)": 33.01, + "step": 2610, + "train_speed(iter/s)": 0.193052 + }, + { + "acc": 0.8910141, + "epoch": 0.866467859509609, + "grad_norm": 3.9126981491650925, + "learning_rate": 9.821113319711799e-06, + "loss": 0.57247858, + "memory(GiB)": 33.01, + "step": 2615, + "train_speed(iter/s)": 0.193056 + }, + { + "acc": 0.88858194, + "epoch": 0.8681245858184228, + "grad_norm": 4.436881308598289, + "learning_rate": 9.823497414692806e-06, + "loss": 0.54993615, + "memory(GiB)": 33.01, + "step": 2620, + "train_speed(iter/s)": 0.19306 + }, + { + "acc": 0.89008675, + "epoch": 0.8697813121272365, + "grad_norm": 7.166566696540512, + "learning_rate": 9.825876964208749e-06, + "loss": 0.61355448, + "memory(GiB)": 33.01, + "step": 2625, + "train_speed(iter/s)": 0.193063 + }, + { + "acc": 0.87891541, + "epoch": 0.8714380384360504, + "grad_norm": 4.395432702419088, + "learning_rate": 9.828251985559222e-06, + "loss": 0.61320391, + "memory(GiB)": 33.01, + "step": 2630, + "train_speed(iter/s)": 0.193067 + }, + { + "acc": 0.87204447, + "epoch": 0.8730947647448641, + "grad_norm": 3.3348825245813267, + "learning_rate": 9.830622495945252e-06, + "loss": 0.60965052, + "memory(GiB)": 33.01, + "step": 2635, + "train_speed(iter/s)": 0.19307 + }, + { + "acc": 0.88088131, + "epoch": 0.8747514910536779, + "grad_norm": 3.9816964216160633, + "learning_rate": 9.832988512470033e-06, + "loss": 0.57345366, + "memory(GiB)": 33.01, + "step": 2640, + "train_speed(iter/s)": 0.193074 + }, + { + "acc": 0.88114119, + "epoch": 0.8764082173624917, + "grad_norm": 9.249511642840648, + "learning_rate": 9.835350052139685e-06, + "loss": 0.58579946, + "memory(GiB)": 33.01, + "step": 2645, + "train_speed(iter/s)": 0.193078 + }, + { + "acc": 0.87508268, + "epoch": 0.8780649436713055, + "grad_norm": 3.0565908923917724, + "learning_rate": 9.837707131863961e-06, + "loss": 0.65310869, + "memory(GiB)": 33.01, + "step": 2650, + "train_speed(iter/s)": 0.193082 + }, + { + "acc": 0.86386747, + "epoch": 0.8797216699801192, + "grad_norm": 4.686453360827556, + "learning_rate": 9.840059768457001e-06, + "loss": 0.63752556, + "memory(GiB)": 33.01, + "step": 2655, + "train_speed(iter/s)": 0.193085 + }, + { + "acc": 0.86603851, + "epoch": 0.8813783962889331, + "grad_norm": 5.402025133939387, + "learning_rate": 9.842407978638033e-06, + "loss": 0.67756987, + "memory(GiB)": 33.01, + "step": 2660, + "train_speed(iter/s)": 0.193075 + }, + { + "acc": 0.88185043, + "epoch": 0.8830351225977469, + "grad_norm": 3.2103146092290977, + "learning_rate": 9.844751779032092e-06, + "loss": 0.61673317, + "memory(GiB)": 33.01, + "step": 2665, + "train_speed(iter/s)": 0.193075 + }, + { + "acc": 0.86560764, + "epoch": 0.8846918489065606, + "grad_norm": 8.33317075660369, + "learning_rate": 9.847091186170731e-06, + "loss": 0.68903618, + "memory(GiB)": 33.01, + "step": 2670, + "train_speed(iter/s)": 0.193079 + }, + { + "acc": 0.87715178, + "epoch": 0.8863485752153745, + "grad_norm": 4.223430736170169, + "learning_rate": 9.849426216492708e-06, + "loss": 0.63275361, + "memory(GiB)": 33.01, + "step": 2675, + "train_speed(iter/s)": 0.193083 + }, + { + "acc": 0.87970896, + "epoch": 0.8880053015241882, + "grad_norm": 2.94121098627528, + "learning_rate": 9.851756886344694e-06, + "loss": 0.57373309, + "memory(GiB)": 33.01, + "step": 2680, + "train_speed(iter/s)": 0.193086 + }, + { + "acc": 0.88578835, + "epoch": 0.889662027833002, + "grad_norm": 3.300063503820921, + "learning_rate": 9.854083211981951e-06, + "loss": 0.58510609, + "memory(GiB)": 33.01, + "step": 2685, + "train_speed(iter/s)": 0.193089 + }, + { + "acc": 0.87865191, + "epoch": 0.8913187541418157, + "grad_norm": 3.9845165580069737, + "learning_rate": 9.85640520956901e-06, + "loss": 0.57173872, + "memory(GiB)": 33.01, + "step": 2690, + "train_speed(iter/s)": 0.193092 + }, + { + "acc": 0.86326895, + "epoch": 0.8929754804506296, + "grad_norm": 6.948348088961666, + "learning_rate": 9.858722895180354e-06, + "loss": 0.68775687, + "memory(GiB)": 33.01, + "step": 2695, + "train_speed(iter/s)": 0.193096 + }, + { + "acc": 0.87145214, + "epoch": 0.8946322067594433, + "grad_norm": 8.752287203643892, + "learning_rate": 9.861036284801077e-06, + "loss": 0.61272125, + "memory(GiB)": 33.01, + "step": 2700, + "train_speed(iter/s)": 0.193099 + }, + { + "acc": 0.87385406, + "epoch": 0.8962889330682571, + "grad_norm": 6.90240422582082, + "learning_rate": 9.863345394327551e-06, + "loss": 0.66489458, + "memory(GiB)": 33.01, + "step": 2705, + "train_speed(iter/s)": 0.193103 + }, + { + "acc": 0.88107843, + "epoch": 0.8979456593770709, + "grad_norm": 2.616336324483754, + "learning_rate": 9.86565023956808e-06, + "loss": 0.59428682, + "memory(GiB)": 33.01, + "step": 2710, + "train_speed(iter/s)": 0.193106 + }, + { + "acc": 0.88717527, + "epoch": 0.8996023856858847, + "grad_norm": 5.794634490985694, + "learning_rate": 9.867950836243542e-06, + "loss": 0.57184386, + "memory(GiB)": 33.01, + "step": 2715, + "train_speed(iter/s)": 0.193109 + }, + { + "acc": 0.87975388, + "epoch": 0.9012591119946984, + "grad_norm": 5.012366580025711, + "learning_rate": 9.870247199988051e-06, + "loss": 0.58441563, + "memory(GiB)": 33.01, + "step": 2720, + "train_speed(iter/s)": 0.193113 + }, + { + "acc": 0.88116417, + "epoch": 0.9029158383035123, + "grad_norm": 2.9602394894892443, + "learning_rate": 9.87253934634957e-06, + "loss": 0.58528523, + "memory(GiB)": 33.01, + "step": 2725, + "train_speed(iter/s)": 0.193116 + }, + { + "acc": 0.874856, + "epoch": 0.904572564612326, + "grad_norm": 5.593801991819785, + "learning_rate": 9.874827290790568e-06, + "loss": 0.63316264, + "memory(GiB)": 33.01, + "step": 2730, + "train_speed(iter/s)": 0.193119 + }, + { + "acc": 0.87478542, + "epoch": 0.9062292909211398, + "grad_norm": 2.853767232052871, + "learning_rate": 9.877111048688618e-06, + "loss": 0.63812952, + "memory(GiB)": 33.01, + "step": 2735, + "train_speed(iter/s)": 0.193122 + }, + { + "acc": 0.87874794, + "epoch": 0.9078860172299537, + "grad_norm": 4.479632406548384, + "learning_rate": 9.879390635337044e-06, + "loss": 0.64914513, + "memory(GiB)": 33.01, + "step": 2740, + "train_speed(iter/s)": 0.193125 + }, + { + "acc": 0.87867851, + "epoch": 0.9095427435387674, + "grad_norm": 3.9428390634160415, + "learning_rate": 9.881666065945522e-06, + "loss": 0.60804138, + "memory(GiB)": 33.01, + "step": 2745, + "train_speed(iter/s)": 0.193129 + }, + { + "acc": 0.8855443, + "epoch": 0.9111994698475812, + "grad_norm": 4.549793534128653, + "learning_rate": 9.883937355640688e-06, + "loss": 0.59703541, + "memory(GiB)": 33.01, + "step": 2750, + "train_speed(iter/s)": 0.193132 + }, + { + "acc": 0.88780842, + "epoch": 0.912856196156395, + "grad_norm": 3.1111898746033857, + "learning_rate": 9.88620451946674e-06, + "loss": 0.59631329, + "memory(GiB)": 33.01, + "step": 2755, + "train_speed(iter/s)": 0.193135 + }, + { + "acc": 0.88322296, + "epoch": 0.9145129224652088, + "grad_norm": 4.568535516335659, + "learning_rate": 9.88846757238605e-06, + "loss": 0.6033711, + "memory(GiB)": 33.01, + "step": 2760, + "train_speed(iter/s)": 0.193139 + }, + { + "acc": 0.88480091, + "epoch": 0.9161696487740225, + "grad_norm": 3.13589447383103, + "learning_rate": 9.890726529279738e-06, + "loss": 0.58900795, + "memory(GiB)": 33.01, + "step": 2765, + "train_speed(iter/s)": 0.193141 + }, + { + "acc": 0.87481556, + "epoch": 0.9178263750828363, + "grad_norm": 5.493848696205705, + "learning_rate": 9.89298140494827e-06, + "loss": 0.6229105, + "memory(GiB)": 33.01, + "step": 2770, + "train_speed(iter/s)": 0.193145 + }, + { + "acc": 0.88898182, + "epoch": 0.9194831013916501, + "grad_norm": 5.590150231287597, + "learning_rate": 9.895232214112037e-06, + "loss": 0.56973162, + "memory(GiB)": 33.01, + "step": 2775, + "train_speed(iter/s)": 0.193148 + }, + { + "acc": 0.87322502, + "epoch": 0.9211398277004639, + "grad_norm": 3.4763799314446353, + "learning_rate": 9.897478971411934e-06, + "loss": 0.61146326, + "memory(GiB)": 33.01, + "step": 2780, + "train_speed(iter/s)": 0.193151 + }, + { + "acc": 0.88107433, + "epoch": 0.9227965540092776, + "grad_norm": 3.8196884536175557, + "learning_rate": 9.899721691409925e-06, + "loss": 0.58410368, + "memory(GiB)": 33.01, + "step": 2785, + "train_speed(iter/s)": 0.193155 + }, + { + "acc": 0.88541393, + "epoch": 0.9244532803180915, + "grad_norm": 6.503850716397312, + "learning_rate": 9.901960388589616e-06, + "loss": 0.56620717, + "memory(GiB)": 33.01, + "step": 2790, + "train_speed(iter/s)": 0.193158 + }, + { + "acc": 0.89066582, + "epoch": 0.9261100066269052, + "grad_norm": 4.407683320212889, + "learning_rate": 9.904195077356816e-06, + "loss": 0.55348973, + "memory(GiB)": 33.01, + "step": 2795, + "train_speed(iter/s)": 0.19316 + }, + { + "acc": 0.87991524, + "epoch": 0.927766732935719, + "grad_norm": 12.359804406687395, + "learning_rate": 9.906425772040086e-06, + "loss": 0.57670903, + "memory(GiB)": 33.01, + "step": 2800, + "train_speed(iter/s)": 0.193163 + }, + { + "acc": 0.87292213, + "epoch": 0.9294234592445328, + "grad_norm": 3.5608286359015193, + "learning_rate": 9.9086524868913e-06, + "loss": 0.62728844, + "memory(GiB)": 33.01, + "step": 2805, + "train_speed(iter/s)": 0.193166 + }, + { + "acc": 0.8768137, + "epoch": 0.9310801855533466, + "grad_norm": 4.492959214169583, + "learning_rate": 9.910875236086185e-06, + "loss": 0.59196949, + "memory(GiB)": 33.01, + "step": 2810, + "train_speed(iter/s)": 0.19317 + }, + { + "acc": 0.87537785, + "epoch": 0.9327369118621603, + "grad_norm": 6.076829852184719, + "learning_rate": 9.913094033724863e-06, + "loss": 0.61625524, + "memory(GiB)": 33.01, + "step": 2815, + "train_speed(iter/s)": 0.193173 + }, + { + "acc": 0.87883272, + "epoch": 0.9343936381709742, + "grad_norm": 5.17410281157418, + "learning_rate": 9.915308893832387e-06, + "loss": 0.61370792, + "memory(GiB)": 33.01, + "step": 2820, + "train_speed(iter/s)": 0.193176 + }, + { + "acc": 0.88947096, + "epoch": 0.936050364479788, + "grad_norm": 2.354419652715678, + "learning_rate": 9.917519830359278e-06, + "loss": 0.56221933, + "memory(GiB)": 33.01, + "step": 2825, + "train_speed(iter/s)": 0.19318 + }, + { + "acc": 0.8921813, + "epoch": 0.9377070907886017, + "grad_norm": 2.3352101015851763, + "learning_rate": 9.91972685718205e-06, + "loss": 0.5675477, + "memory(GiB)": 33.01, + "step": 2830, + "train_speed(iter/s)": 0.193183 + }, + { + "acc": 0.89346809, + "epoch": 0.9393638170974155, + "grad_norm": 3.8779157237954567, + "learning_rate": 9.921929988103727e-06, + "loss": 0.55961146, + "memory(GiB)": 33.01, + "step": 2835, + "train_speed(iter/s)": 0.193186 + }, + { + "acc": 0.88623056, + "epoch": 0.9410205434062293, + "grad_norm": 7.307612857800359, + "learning_rate": 9.92412923685437e-06, + "loss": 0.59642801, + "memory(GiB)": 33.01, + "step": 2840, + "train_speed(iter/s)": 0.193189 + }, + { + "acc": 0.88649006, + "epoch": 0.9426772697150431, + "grad_norm": 3.5190311837873796, + "learning_rate": 9.926324617091579e-06, + "loss": 0.5734262, + "memory(GiB)": 33.01, + "step": 2845, + "train_speed(iter/s)": 0.193191 + }, + { + "acc": 0.8924675, + "epoch": 0.9443339960238568, + "grad_norm": 8.106329865969013, + "learning_rate": 9.928516142401016e-06, + "loss": 0.56243801, + "memory(GiB)": 33.01, + "step": 2850, + "train_speed(iter/s)": 0.193195 + }, + { + "acc": 0.88808842, + "epoch": 0.9459907223326707, + "grad_norm": 3.559472198524817, + "learning_rate": 9.930703826296901e-06, + "loss": 0.53762207, + "memory(GiB)": 33.01, + "step": 2855, + "train_speed(iter/s)": 0.193198 + }, + { + "acc": 0.88387766, + "epoch": 0.9476474486414844, + "grad_norm": 3.4791848519540687, + "learning_rate": 9.932887682222506e-06, + "loss": 0.54636021, + "memory(GiB)": 33.01, + "step": 2860, + "train_speed(iter/s)": 0.193202 + }, + { + "acc": 0.88861408, + "epoch": 0.9493041749502982, + "grad_norm": 4.650097463223281, + "learning_rate": 9.935067723550667e-06, + "loss": 0.61271467, + "memory(GiB)": 33.01, + "step": 2865, + "train_speed(iter/s)": 0.193205 + }, + { + "acc": 0.89421978, + "epoch": 0.950960901259112, + "grad_norm": 3.9924528892139226, + "learning_rate": 9.937243963584262e-06, + "loss": 0.54757972, + "memory(GiB)": 33.01, + "step": 2870, + "train_speed(iter/s)": 0.193208 + }, + { + "acc": 0.89381542, + "epoch": 0.9526176275679258, + "grad_norm": 4.680966813328855, + "learning_rate": 9.939416415556704e-06, + "loss": 0.5293014, + "memory(GiB)": 33.01, + "step": 2875, + "train_speed(iter/s)": 0.193211 + }, + { + "acc": 0.89750099, + "epoch": 0.9542743538767395, + "grad_norm": 6.0306331010496805, + "learning_rate": 9.941585092632416e-06, + "loss": 0.53185587, + "memory(GiB)": 33.01, + "step": 2880, + "train_speed(iter/s)": 0.193213 + }, + { + "acc": 0.88830643, + "epoch": 0.9559310801855534, + "grad_norm": 9.350386874260305, + "learning_rate": 9.943750007907325e-06, + "loss": 0.58860159, + "memory(GiB)": 33.01, + "step": 2885, + "train_speed(iter/s)": 0.193205 + }, + { + "acc": 0.87832508, + "epoch": 0.9575878064943671, + "grad_norm": 2.981169782948862, + "learning_rate": 9.945911174409318e-06, + "loss": 0.59479094, + "memory(GiB)": 33.01, + "step": 2890, + "train_speed(iter/s)": 0.193203 + }, + { + "acc": 0.89966698, + "epoch": 0.9592445328031809, + "grad_norm": 4.547161210484024, + "learning_rate": 9.94806860509873e-06, + "loss": 0.53411198, + "memory(GiB)": 33.01, + "step": 2895, + "train_speed(iter/s)": 0.193203 + }, + { + "acc": 0.89919624, + "epoch": 0.9609012591119946, + "grad_norm": 5.715099384971192, + "learning_rate": 9.950222312868795e-06, + "loss": 0.54964809, + "memory(GiB)": 33.01, + "step": 2900, + "train_speed(iter/s)": 0.193205 + }, + { + "acc": 0.89024811, + "epoch": 0.9625579854208085, + "grad_norm": 15.890328578611772, + "learning_rate": 9.952372310546117e-06, + "loss": 0.53085089, + "memory(GiB)": 33.01, + "step": 2905, + "train_speed(iter/s)": 0.193209 + }, + { + "acc": 0.89768763, + "epoch": 0.9642147117296223, + "grad_norm": 3.423345092171575, + "learning_rate": 9.954518610891133e-06, + "loss": 0.51252246, + "memory(GiB)": 33.01, + "step": 2910, + "train_speed(iter/s)": 0.193212 + }, + { + "acc": 0.89978552, + "epoch": 0.965871438038436, + "grad_norm": 3.4753424048647212, + "learning_rate": 9.956661226598551e-06, + "loss": 0.50663137, + "memory(GiB)": 33.01, + "step": 2915, + "train_speed(iter/s)": 0.193215 + }, + { + "acc": 0.88726511, + "epoch": 0.9675281643472499, + "grad_norm": 8.177323401447397, + "learning_rate": 9.958800170297816e-06, + "loss": 0.57537966, + "memory(GiB)": 33.01, + "step": 2920, + "train_speed(iter/s)": 0.193218 + }, + { + "acc": 0.88609066, + "epoch": 0.9691848906560636, + "grad_norm": 4.191968466292702, + "learning_rate": 9.96093545455355e-06, + "loss": 0.58137269, + "memory(GiB)": 33.01, + "step": 2925, + "train_speed(iter/s)": 0.193221 + }, + { + "acc": 0.88744173, + "epoch": 0.9708416169648774, + "grad_norm": 5.809899061291322, + "learning_rate": 9.963067091865996e-06, + "loss": 0.56933103, + "memory(GiB)": 33.01, + "step": 2930, + "train_speed(iter/s)": 0.193224 + }, + { + "acc": 0.88377037, + "epoch": 0.9724983432736912, + "grad_norm": 3.0688050587103826, + "learning_rate": 9.965195094671458e-06, + "loss": 0.57009926, + "memory(GiB)": 33.01, + "step": 2935, + "train_speed(iter/s)": 0.193227 + }, + { + "acc": 0.88039722, + "epoch": 0.974155069582505, + "grad_norm": 2.7636680800360995, + "learning_rate": 9.967319475342736e-06, + "loss": 0.54072194, + "memory(GiB)": 33.01, + "step": 2940, + "train_speed(iter/s)": 0.19323 + }, + { + "acc": 0.88092518, + "epoch": 0.9758117958913187, + "grad_norm": 2.642166247104512, + "learning_rate": 9.969440246189554e-06, + "loss": 0.56847663, + "memory(GiB)": 33.01, + "step": 2945, + "train_speed(iter/s)": 0.193233 + }, + { + "acc": 0.88988876, + "epoch": 0.9774685222001326, + "grad_norm": 3.2327427727920437, + "learning_rate": 9.971557419458993e-06, + "loss": 0.53624258, + "memory(GiB)": 33.01, + "step": 2950, + "train_speed(iter/s)": 0.193236 + }, + { + "acc": 0.8884038, + "epoch": 0.9791252485089463, + "grad_norm": 5.918917242589188, + "learning_rate": 9.973671007335915e-06, + "loss": 0.53204603, + "memory(GiB)": 33.01, + "step": 2955, + "train_speed(iter/s)": 0.19324 + }, + { + "acc": 0.88612843, + "epoch": 0.9807819748177601, + "grad_norm": 3.5210439396485036, + "learning_rate": 9.975781021943376e-06, + "loss": 0.5242445, + "memory(GiB)": 33.01, + "step": 2960, + "train_speed(iter/s)": 0.193243 + }, + { + "acc": 0.88615685, + "epoch": 0.9824387011265739, + "grad_norm": 3.62728999755055, + "learning_rate": 9.977887475343056e-06, + "loss": 0.51521478, + "memory(GiB)": 33.01, + "step": 2965, + "train_speed(iter/s)": 0.193246 + }, + { + "acc": 0.89091187, + "epoch": 0.9840954274353877, + "grad_norm": 4.920774391651594, + "learning_rate": 9.979990379535667e-06, + "loss": 0.56898975, + "memory(GiB)": 33.01, + "step": 2970, + "train_speed(iter/s)": 0.193249 + }, + { + "acc": 0.89261436, + "epoch": 0.9857521537442014, + "grad_norm": 3.3201174498123835, + "learning_rate": 9.982089746461353e-06, + "loss": 0.53635111, + "memory(GiB)": 33.01, + "step": 2975, + "train_speed(iter/s)": 0.193252 + }, + { + "acc": 0.88731613, + "epoch": 0.9874088800530152, + "grad_norm": 5.747764318444423, + "learning_rate": 9.984185588000118e-06, + "loss": 0.60284166, + "memory(GiB)": 33.01, + "step": 2980, + "train_speed(iter/s)": 0.193255 + }, + { + "acc": 0.88962011, + "epoch": 0.989065606361829, + "grad_norm": 3.864722841804485, + "learning_rate": 9.986277915972207e-06, + "loss": 0.54507532, + "memory(GiB)": 33.01, + "step": 2985, + "train_speed(iter/s)": 0.193258 + }, + { + "acc": 0.89195766, + "epoch": 0.9907223326706428, + "grad_norm": 6.5018775023143265, + "learning_rate": 9.988366742138523e-06, + "loss": 0.52294307, + "memory(GiB)": 33.01, + "step": 2990, + "train_speed(iter/s)": 0.19326 + }, + { + "acc": 0.89480324, + "epoch": 0.9923790589794566, + "grad_norm": 7.012731743450955, + "learning_rate": 9.990452078201013e-06, + "loss": 0.49354048, + "memory(GiB)": 33.01, + "step": 2995, + "train_speed(iter/s)": 0.193263 + }, + { + "acc": 0.89031048, + "epoch": 0.9940357852882704, + "grad_norm": 8.816750683995743, + "learning_rate": 9.992533935803069e-06, + "loss": 0.54319358, + "memory(GiB)": 33.01, + "step": 3000, + "train_speed(iter/s)": 0.193265 + }, + { + "acc": 0.89605188, + "epoch": 0.9956925115970842, + "grad_norm": 3.7296249915122592, + "learning_rate": 9.994612326529912e-06, + "loss": 0.5190588, + "memory(GiB)": 33.01, + "step": 3005, + "train_speed(iter/s)": 0.193268 + }, + { + "acc": 0.90042381, + "epoch": 0.9973492379058979, + "grad_norm": 4.378739933514348, + "learning_rate": 9.996687261908986e-06, + "loss": 0.48994136, + "memory(GiB)": 33.01, + "step": 3010, + "train_speed(iter/s)": 0.193271 + }, + { + "acc": 0.90874329, + "epoch": 0.9990059642147118, + "grad_norm": 3.503144553319389, + "learning_rate": 9.998758753410328e-06, + "loss": 0.45982637, + "memory(GiB)": 33.01, + "step": 3015, + "train_speed(iter/s)": 0.193274 + }, + { + "epoch": 1.0, + "eval_acc": 0.9013307583879955, + "eval_loss": 0.24342016875743866, + "eval_runtime": 504.8587, + "eval_samples_per_second": 10.627, + "eval_steps_per_second": 1.329, + "step": 3018 + }, + { + "acc": 0.90102835, + "epoch": 1.0006626905235254, + "grad_norm": 2.532139645350858, + "learning_rate": 9.999999969986915e-06, + "loss": 0.49257302, + "memory(GiB)": 33.01, + "step": 3020, + "train_speed(iter/s)": 0.186613 + }, + { + "acc": 0.90085068, + "epoch": 1.0023194168323393, + "grad_norm": 4.620805847378081, + "learning_rate": 9.99999963233971e-06, + "loss": 0.47919602, + "memory(GiB)": 33.01, + "step": 3025, + "train_speed(iter/s)": 0.186623 + }, + { + "acc": 0.89500475, + "epoch": 1.0039761431411531, + "grad_norm": 7.341574928547011, + "learning_rate": 9.999998919528973e-06, + "loss": 0.51861906, + "memory(GiB)": 33.01, + "step": 3030, + "train_speed(iter/s)": 0.186631 + }, + { + "acc": 0.89477425, + "epoch": 1.0056328694499668, + "grad_norm": 5.892171722977981, + "learning_rate": 9.999997831554753e-06, + "loss": 0.55375786, + "memory(GiB)": 33.01, + "step": 3035, + "train_speed(iter/s)": 0.18664 + }, + { + "acc": 0.8929534, + "epoch": 1.0072895957587806, + "grad_norm": 6.683059805886383, + "learning_rate": 9.999996368417133e-06, + "loss": 0.52333064, + "memory(GiB)": 33.01, + "step": 3040, + "train_speed(iter/s)": 0.18665 + }, + { + "acc": 0.89874592, + "epoch": 1.0089463220675945, + "grad_norm": 3.324861288632335, + "learning_rate": 9.999994530116223e-06, + "loss": 0.51245885, + "memory(GiB)": 33.01, + "step": 3045, + "train_speed(iter/s)": 0.186659 + }, + { + "acc": 0.91399202, + "epoch": 1.0106030483764081, + "grad_norm": 2.200372721059653, + "learning_rate": 9.99999231665216e-06, + "loss": 0.45101566, + "memory(GiB)": 33.01, + "step": 3050, + "train_speed(iter/s)": 0.186668 + }, + { + "acc": 0.90618773, + "epoch": 1.012259774685222, + "grad_norm": 3.197535643791647, + "learning_rate": 9.999989728025112e-06, + "loss": 0.470046, + "memory(GiB)": 33.01, + "step": 3055, + "train_speed(iter/s)": 0.186677 + }, + { + "acc": 0.89488354, + "epoch": 1.0139165009940359, + "grad_norm": 3.6610620112383843, + "learning_rate": 9.99998676423527e-06, + "loss": 0.49543657, + "memory(GiB)": 33.01, + "step": 3060, + "train_speed(iter/s)": 0.186686 + }, + { + "acc": 0.90282478, + "epoch": 1.0155732273028495, + "grad_norm": 7.12331538371565, + "learning_rate": 9.99998342528286e-06, + "loss": 0.47873211, + "memory(GiB)": 33.01, + "step": 3065, + "train_speed(iter/s)": 0.186696 + }, + { + "acc": 0.90743256, + "epoch": 1.0172299536116634, + "grad_norm": 7.3538832979952, + "learning_rate": 9.999979711168132e-06, + "loss": 0.48369231, + "memory(GiB)": 33.01, + "step": 3070, + "train_speed(iter/s)": 0.186705 + }, + { + "acc": 0.89925756, + "epoch": 1.0188866799204772, + "grad_norm": 5.3141924394185915, + "learning_rate": 9.999975621891365e-06, + "loss": 0.51017771, + "memory(GiB)": 33.01, + "step": 3075, + "train_speed(iter/s)": 0.186715 + }, + { + "acc": 0.89403591, + "epoch": 1.0205434062292909, + "grad_norm": 2.901880423400501, + "learning_rate": 9.999971157452864e-06, + "loss": 0.50029345, + "memory(GiB)": 33.01, + "step": 3080, + "train_speed(iter/s)": 0.186724 + }, + { + "acc": 0.89361725, + "epoch": 1.0222001325381047, + "grad_norm": 2.4117637892733326, + "learning_rate": 9.999966317852965e-06, + "loss": 0.52009544, + "memory(GiB)": 33.01, + "step": 3085, + "train_speed(iter/s)": 0.186718 + }, + { + "acc": 0.89835072, + "epoch": 1.0238568588469186, + "grad_norm": 2.750198930871617, + "learning_rate": 9.999961103092028e-06, + "loss": 0.52763042, + "memory(GiB)": 33.01, + "step": 3090, + "train_speed(iter/s)": 0.186711 + }, + { + "acc": 0.89915695, + "epoch": 1.0255135851557322, + "grad_norm": 3.95803224526299, + "learning_rate": 9.99995551317045e-06, + "loss": 0.51246595, + "memory(GiB)": 33.01, + "step": 3095, + "train_speed(iter/s)": 0.186716 + }, + { + "acc": 0.90447063, + "epoch": 1.027170311464546, + "grad_norm": 2.656686168940902, + "learning_rate": 9.999949548088648e-06, + "loss": 0.47282877, + "memory(GiB)": 33.01, + "step": 3100, + "train_speed(iter/s)": 0.186724 + }, + { + "acc": 0.9053257, + "epoch": 1.02882703777336, + "grad_norm": 2.702954123523646, + "learning_rate": 9.999943207847069e-06, + "loss": 0.45020781, + "memory(GiB)": 33.01, + "step": 3105, + "train_speed(iter/s)": 0.186734 + }, + { + "acc": 0.90436687, + "epoch": 1.0304837640821736, + "grad_norm": 3.518867930567591, + "learning_rate": 9.999936492446188e-06, + "loss": 0.46951027, + "memory(GiB)": 33.01, + "step": 3110, + "train_speed(iter/s)": 0.186743 + }, + { + "acc": 0.90386429, + "epoch": 1.0321404903909874, + "grad_norm": 3.2722257223182325, + "learning_rate": 9.999929401886511e-06, + "loss": 0.46613092, + "memory(GiB)": 33.01, + "step": 3115, + "train_speed(iter/s)": 0.186752 + }, + { + "acc": 0.90762558, + "epoch": 1.0337972166998013, + "grad_norm": 3.6064831547615412, + "learning_rate": 9.999921936168569e-06, + "loss": 0.47079697, + "memory(GiB)": 33.01, + "step": 3120, + "train_speed(iter/s)": 0.186761 + }, + { + "acc": 0.90466404, + "epoch": 1.035453943008615, + "grad_norm": 24.535623163011532, + "learning_rate": 9.999914095292922e-06, + "loss": 0.48182907, + "memory(GiB)": 33.01, + "step": 3125, + "train_speed(iter/s)": 0.186771 + }, + { + "acc": 0.90340786, + "epoch": 1.0371106693174288, + "grad_norm": 2.9159017707204695, + "learning_rate": 9.999905879260158e-06, + "loss": 0.46948576, + "memory(GiB)": 33.01, + "step": 3130, + "train_speed(iter/s)": 0.18678 + }, + { + "acc": 0.8990778, + "epoch": 1.0387673956262427, + "grad_norm": 3.190435152275673, + "learning_rate": 9.999897288070895e-06, + "loss": 0.53192158, + "memory(GiB)": 33.01, + "step": 3135, + "train_speed(iter/s)": 0.186789 + }, + { + "acc": 0.8878931, + "epoch": 1.0404241219350563, + "grad_norm": 3.984589063314788, + "learning_rate": 9.999888321725778e-06, + "loss": 0.54462519, + "memory(GiB)": 33.01, + "step": 3140, + "train_speed(iter/s)": 0.186798 + }, + { + "acc": 0.9036356, + "epoch": 1.0420808482438702, + "grad_norm": 4.1234824814065005, + "learning_rate": 9.999878980225477e-06, + "loss": 0.48756065, + "memory(GiB)": 33.01, + "step": 3145, + "train_speed(iter/s)": 0.186807 + }, + { + "acc": 0.90776148, + "epoch": 1.0437375745526838, + "grad_norm": 3.184455308667677, + "learning_rate": 9.999869263570696e-06, + "loss": 0.48718882, + "memory(GiB)": 33.01, + "step": 3150, + "train_speed(iter/s)": 0.186816 + }, + { + "acc": 0.90270214, + "epoch": 1.0453943008614976, + "grad_norm": 2.879851373753325, + "learning_rate": 9.999859171762164e-06, + "loss": 0.4768774, + "memory(GiB)": 33.01, + "step": 3155, + "train_speed(iter/s)": 0.186825 + }, + { + "acc": 0.9136734, + "epoch": 1.0470510271703115, + "grad_norm": 2.804970085971501, + "learning_rate": 9.999848704800635e-06, + "loss": 0.42542081, + "memory(GiB)": 33.01, + "step": 3160, + "train_speed(iter/s)": 0.186834 + }, + { + "acc": 0.90003986, + "epoch": 1.0487077534791251, + "grad_norm": 2.2480285866426275, + "learning_rate": 9.999837862686897e-06, + "loss": 0.46268835, + "memory(GiB)": 33.01, + "step": 3165, + "train_speed(iter/s)": 0.186843 + }, + { + "acc": 0.90802698, + "epoch": 1.050364479787939, + "grad_norm": 3.252978868283233, + "learning_rate": 9.999826645421765e-06, + "loss": 0.44581494, + "memory(GiB)": 33.01, + "step": 3170, + "train_speed(iter/s)": 0.186851 + }, + { + "acc": 0.89781418, + "epoch": 1.0520212060967529, + "grad_norm": 4.894761130520978, + "learning_rate": 9.999815053006078e-06, + "loss": 0.51909595, + "memory(GiB)": 33.01, + "step": 3175, + "train_speed(iter/s)": 0.18686 + }, + { + "acc": 0.89423304, + "epoch": 1.0536779324055665, + "grad_norm": 4.716050857292568, + "learning_rate": 9.999803085440708e-06, + "loss": 0.54763927, + "memory(GiB)": 33.01, + "step": 3180, + "train_speed(iter/s)": 0.186869 + }, + { + "acc": 0.90006123, + "epoch": 1.0553346587143804, + "grad_norm": 2.617206332092734, + "learning_rate": 9.99979074272655e-06, + "loss": 0.48034215, + "memory(GiB)": 33.01, + "step": 3185, + "train_speed(iter/s)": 0.186877 + }, + { + "acc": 0.89743977, + "epoch": 1.0569913850231942, + "grad_norm": 3.2918068791076225, + "learning_rate": 9.999778024864532e-06, + "loss": 0.50724802, + "memory(GiB)": 33.01, + "step": 3190, + "train_speed(iter/s)": 0.186886 + }, + { + "acc": 0.90672321, + "epoch": 1.0586481113320079, + "grad_norm": 3.5955126458372857, + "learning_rate": 9.999764931855611e-06, + "loss": 0.47439051, + "memory(GiB)": 33.01, + "step": 3195, + "train_speed(iter/s)": 0.186895 + }, + { + "acc": 0.89568729, + "epoch": 1.0603048376408217, + "grad_norm": 3.5230454219970184, + "learning_rate": 9.999751463700765e-06, + "loss": 0.4475791, + "memory(GiB)": 33.01, + "step": 3200, + "train_speed(iter/s)": 0.186904 + }, + { + "acc": 0.91135111, + "epoch": 1.0619615639496356, + "grad_norm": 2.190094029746288, + "learning_rate": 9.999737620401006e-06, + "loss": 0.41744318, + "memory(GiB)": 33.01, + "step": 3205, + "train_speed(iter/s)": 0.186913 + }, + { + "acc": 0.90460682, + "epoch": 1.0636182902584492, + "grad_norm": 3.3341024121046825, + "learning_rate": 9.999723401957376e-06, + "loss": 0.41109085, + "memory(GiB)": 33.01, + "step": 3210, + "train_speed(iter/s)": 0.186921 + }, + { + "acc": 0.90452099, + "epoch": 1.065275016567263, + "grad_norm": 4.172841742216287, + "learning_rate": 9.999708808370937e-06, + "loss": 0.45828161, + "memory(GiB)": 33.01, + "step": 3215, + "train_speed(iter/s)": 0.186929 + }, + { + "acc": 0.911588, + "epoch": 1.066931742876077, + "grad_norm": 3.6576288539358375, + "learning_rate": 9.999693839642787e-06, + "loss": 0.45138202, + "memory(GiB)": 33.01, + "step": 3220, + "train_speed(iter/s)": 0.186937 + }, + { + "acc": 0.90843134, + "epoch": 1.0685884691848906, + "grad_norm": 3.9526372468287194, + "learning_rate": 9.99967849577405e-06, + "loss": 0.42826166, + "memory(GiB)": 33.01, + "step": 3225, + "train_speed(iter/s)": 0.186946 + }, + { + "acc": 0.90101509, + "epoch": 1.0702451954937044, + "grad_norm": 4.050874644927262, + "learning_rate": 9.999662776765875e-06, + "loss": 0.47177067, + "memory(GiB)": 33.01, + "step": 3230, + "train_speed(iter/s)": 0.186954 + }, + { + "acc": 0.89944954, + "epoch": 1.0719019218025183, + "grad_norm": 3.5071629101682724, + "learning_rate": 9.999646682619443e-06, + "loss": 0.46376143, + "memory(GiB)": 33.01, + "step": 3235, + "train_speed(iter/s)": 0.186963 + }, + { + "acc": 0.90557594, + "epoch": 1.073558648111332, + "grad_norm": 3.2694798381027628, + "learning_rate": 9.999630213335963e-06, + "loss": 0.48373728, + "memory(GiB)": 33.01, + "step": 3240, + "train_speed(iter/s)": 0.186971 + }, + { + "acc": 0.8997365, + "epoch": 1.0752153744201458, + "grad_norm": 4.088897152250184, + "learning_rate": 9.999613368916666e-06, + "loss": 0.4708004, + "memory(GiB)": 33.01, + "step": 3245, + "train_speed(iter/s)": 0.18698 + }, + { + "acc": 0.90957518, + "epoch": 1.0768721007289597, + "grad_norm": 3.5456188254242926, + "learning_rate": 9.999596149362823e-06, + "loss": 0.41807528, + "memory(GiB)": 33.01, + "step": 3250, + "train_speed(iter/s)": 0.186988 + }, + { + "acc": 0.90725698, + "epoch": 1.0785288270377733, + "grad_norm": 3.2934084922845868, + "learning_rate": 9.99957855467572e-06, + "loss": 0.41615124, + "memory(GiB)": 33.01, + "step": 3255, + "train_speed(iter/s)": 0.186996 + }, + { + "acc": 0.91363049, + "epoch": 1.0801855533465872, + "grad_norm": 2.9929364538347074, + "learning_rate": 9.99956058485668e-06, + "loss": 0.39844606, + "memory(GiB)": 33.01, + "step": 3260, + "train_speed(iter/s)": 0.187005 + }, + { + "acc": 0.92446899, + "epoch": 1.081842279655401, + "grad_norm": 3.0013491142983875, + "learning_rate": 9.999542239907052e-06, + "loss": 0.38574693, + "memory(GiB)": 33.01, + "step": 3265, + "train_speed(iter/s)": 0.187013 + }, + { + "acc": 0.91798916, + "epoch": 1.0834990059642147, + "grad_norm": 2.9253649675762707, + "learning_rate": 9.999523519828212e-06, + "loss": 0.38218203, + "memory(GiB)": 33.01, + "step": 3270, + "train_speed(iter/s)": 0.187022 + }, + { + "acc": 0.91032162, + "epoch": 1.0851557322730285, + "grad_norm": 3.4505921508829287, + "learning_rate": 9.999504424621564e-06, + "loss": 0.40374975, + "memory(GiB)": 33.01, + "step": 3275, + "train_speed(iter/s)": 0.187029 + }, + { + "acc": 0.90089769, + "epoch": 1.0868124585818424, + "grad_norm": 3.6954550013229697, + "learning_rate": 9.999484954288543e-06, + "loss": 0.47235622, + "memory(GiB)": 33.01, + "step": 3280, + "train_speed(iter/s)": 0.187037 + }, + { + "acc": 0.90761738, + "epoch": 1.088469184890656, + "grad_norm": 3.5433394473860798, + "learning_rate": 9.999465108830605e-06, + "loss": 0.41755667, + "memory(GiB)": 33.01, + "step": 3285, + "train_speed(iter/s)": 0.187046 + }, + { + "acc": 0.91872139, + "epoch": 1.0901259111994699, + "grad_norm": 3.9994350312871383, + "learning_rate": 9.999444888249247e-06, + "loss": 0.43578491, + "memory(GiB)": 33.01, + "step": 3290, + "train_speed(iter/s)": 0.187054 + }, + { + "acc": 0.92029285, + "epoch": 1.0917826375082837, + "grad_norm": 2.8535836540855994, + "learning_rate": 9.999424292545981e-06, + "loss": 0.40754728, + "memory(GiB)": 33.01, + "step": 3295, + "train_speed(iter/s)": 0.187063 + }, + { + "acc": 0.91320772, + "epoch": 1.0934393638170974, + "grad_norm": 3.159190628284286, + "learning_rate": 9.999403321722352e-06, + "loss": 0.44989252, + "memory(GiB)": 33.01, + "step": 3300, + "train_speed(iter/s)": 0.187072 + }, + { + "acc": 0.90615292, + "epoch": 1.0950960901259112, + "grad_norm": 2.6560869687241637, + "learning_rate": 9.999381975779937e-06, + "loss": 0.41778717, + "memory(GiB)": 33.01, + "step": 3305, + "train_speed(iter/s)": 0.18708 + }, + { + "acc": 0.91706696, + "epoch": 1.0967528164347249, + "grad_norm": 3.3806554149021606, + "learning_rate": 9.999360254720336e-06, + "loss": 0.40516958, + "memory(GiB)": 33.01, + "step": 3310, + "train_speed(iter/s)": 0.187082 + }, + { + "acc": 0.91830273, + "epoch": 1.0984095427435387, + "grad_norm": 3.019253191194386, + "learning_rate": 9.999338158545177e-06, + "loss": 0.39181962, + "memory(GiB)": 33.01, + "step": 3315, + "train_speed(iter/s)": 0.187083 + }, + { + "acc": 0.91280575, + "epoch": 1.1000662690523526, + "grad_norm": 3.086005010991861, + "learning_rate": 9.999315687256122e-06, + "loss": 0.41425366, + "memory(GiB)": 33.01, + "step": 3320, + "train_speed(iter/s)": 0.187085 + }, + { + "acc": 0.91823215, + "epoch": 1.1017229953611662, + "grad_norm": 2.5142824427576036, + "learning_rate": 9.999292840854854e-06, + "loss": 0.36637301, + "memory(GiB)": 33.01, + "step": 3325, + "train_speed(iter/s)": 0.187089 + }, + { + "acc": 0.91018171, + "epoch": 1.10337972166998, + "grad_norm": 4.8237007090091915, + "learning_rate": 9.99926961934309e-06, + "loss": 0.43959985, + "memory(GiB)": 33.01, + "step": 3330, + "train_speed(iter/s)": 0.187098 + }, + { + "acc": 0.91465988, + "epoch": 1.105036447978794, + "grad_norm": 5.9902259481845554, + "learning_rate": 9.999246022722572e-06, + "loss": 0.42311144, + "memory(GiB)": 33.01, + "step": 3335, + "train_speed(iter/s)": 0.187107 + }, + { + "acc": 0.90747385, + "epoch": 1.1066931742876076, + "grad_norm": 3.742504529402083, + "learning_rate": 9.999222050995069e-06, + "loss": 0.41870995, + "memory(GiB)": 33.01, + "step": 3340, + "train_speed(iter/s)": 0.187115 + }, + { + "acc": 0.9087347, + "epoch": 1.1083499005964215, + "grad_norm": 3.081075463249609, + "learning_rate": 9.999197704162381e-06, + "loss": 0.43444052, + "memory(GiB)": 33.01, + "step": 3345, + "train_speed(iter/s)": 0.187123 + }, + { + "acc": 0.9046854, + "epoch": 1.1100066269052353, + "grad_norm": 4.796168628118214, + "learning_rate": 9.999172982226335e-06, + "loss": 0.47512655, + "memory(GiB)": 33.01, + "step": 3350, + "train_speed(iter/s)": 0.187132 + }, + { + "acc": 0.90798407, + "epoch": 1.111663353214049, + "grad_norm": 2.98283344383208, + "learning_rate": 9.999147885188786e-06, + "loss": 0.44967909, + "memory(GiB)": 33.01, + "step": 3355, + "train_speed(iter/s)": 0.18714 + }, + { + "acc": 0.9132597, + "epoch": 1.1133200795228628, + "grad_norm": 5.148743019636876, + "learning_rate": 9.999122413051616e-06, + "loss": 0.41550369, + "memory(GiB)": 33.01, + "step": 3360, + "train_speed(iter/s)": 0.187148 + }, + { + "acc": 0.91174011, + "epoch": 1.1149768058316767, + "grad_norm": 4.429328627712513, + "learning_rate": 9.999096565816741e-06, + "loss": 0.40560884, + "memory(GiB)": 33.01, + "step": 3365, + "train_speed(iter/s)": 0.187156 + }, + { + "acc": 0.9127593, + "epoch": 1.1166335321404903, + "grad_norm": 2.7428037575839994, + "learning_rate": 9.999070343486095e-06, + "loss": 0.38106241, + "memory(GiB)": 33.01, + "step": 3370, + "train_speed(iter/s)": 0.187164 + }, + { + "acc": 0.91239586, + "epoch": 1.1182902584493042, + "grad_norm": 3.6801501583797127, + "learning_rate": 9.999043746061649e-06, + "loss": 0.39418139, + "memory(GiB)": 33.01, + "step": 3375, + "train_speed(iter/s)": 0.187172 + }, + { + "acc": 0.91507854, + "epoch": 1.119946984758118, + "grad_norm": 3.6826512051892926, + "learning_rate": 9.999016773545396e-06, + "loss": 0.40172067, + "memory(GiB)": 33.01, + "step": 3380, + "train_speed(iter/s)": 0.18718 + }, + { + "acc": 0.91697407, + "epoch": 1.1216037110669317, + "grad_norm": 3.7316676664290243, + "learning_rate": 9.998989425939364e-06, + "loss": 0.38764234, + "memory(GiB)": 33.01, + "step": 3385, + "train_speed(iter/s)": 0.187188 + }, + { + "acc": 0.90916672, + "epoch": 1.1232604373757455, + "grad_norm": 3.118760160636055, + "learning_rate": 9.998961703245603e-06, + "loss": 0.40729375, + "memory(GiB)": 33.01, + "step": 3390, + "train_speed(iter/s)": 0.187196 + }, + { + "acc": 0.91550961, + "epoch": 1.1249171636845594, + "grad_norm": 3.402058127498593, + "learning_rate": 9.998933605466193e-06, + "loss": 0.36972272, + "memory(GiB)": 33.01, + "step": 3395, + "train_speed(iter/s)": 0.187204 + }, + { + "acc": 0.92313728, + "epoch": 1.126573889993373, + "grad_norm": 3.2158022961513244, + "learning_rate": 9.998905132603244e-06, + "loss": 0.40722723, + "memory(GiB)": 33.01, + "step": 3400, + "train_speed(iter/s)": 0.187212 + }, + { + "acc": 0.91641541, + "epoch": 1.128230616302187, + "grad_norm": 3.4663386627981265, + "learning_rate": 9.99887628465889e-06, + "loss": 0.36683979, + "memory(GiB)": 33.01, + "step": 3405, + "train_speed(iter/s)": 0.18722 + }, + { + "acc": 0.90837517, + "epoch": 1.1298873426110005, + "grad_norm": 3.406324627196776, + "learning_rate": 9.998847061635298e-06, + "loss": 0.45863819, + "memory(GiB)": 33.01, + "step": 3410, + "train_speed(iter/s)": 0.187229 + }, + { + "acc": 0.90772152, + "epoch": 1.1315440689198144, + "grad_norm": 3.5219354346318, + "learning_rate": 9.998817463534658e-06, + "loss": 0.43631525, + "memory(GiB)": 33.01, + "step": 3415, + "train_speed(iter/s)": 0.187237 + }, + { + "acc": 0.90815153, + "epoch": 1.1332007952286283, + "grad_norm": 1.8801903029118625, + "learning_rate": 9.998787490359197e-06, + "loss": 0.39505372, + "memory(GiB)": 33.01, + "step": 3420, + "train_speed(iter/s)": 0.187245 + }, + { + "acc": 0.92017059, + "epoch": 1.134857521537442, + "grad_norm": 3.439583077856211, + "learning_rate": 9.998757142111157e-06, + "loss": 0.36194487, + "memory(GiB)": 33.01, + "step": 3425, + "train_speed(iter/s)": 0.187253 + }, + { + "acc": 0.91789627, + "epoch": 1.1365142478462558, + "grad_norm": 3.3683622265052215, + "learning_rate": 9.99872641879282e-06, + "loss": 0.37170382, + "memory(GiB)": 33.01, + "step": 3430, + "train_speed(iter/s)": 0.18726 + }, + { + "acc": 0.91388178, + "epoch": 1.1381709741550696, + "grad_norm": 3.317847183719663, + "learning_rate": 9.998695320406492e-06, + "loss": 0.40776911, + "memory(GiB)": 33.01, + "step": 3435, + "train_speed(iter/s)": 0.187268 + }, + { + "acc": 0.90797386, + "epoch": 1.1398277004638833, + "grad_norm": 2.6200821673604957, + "learning_rate": 9.998663846954502e-06, + "loss": 0.39878166, + "memory(GiB)": 33.01, + "step": 3440, + "train_speed(iter/s)": 0.187276 + }, + { + "acc": 0.91855488, + "epoch": 1.1414844267726971, + "grad_norm": 2.8307763205024603, + "learning_rate": 9.998631998439218e-06, + "loss": 0.40054965, + "memory(GiB)": 33.01, + "step": 3445, + "train_speed(iter/s)": 0.187284 + }, + { + "acc": 0.91904821, + "epoch": 1.143141153081511, + "grad_norm": 3.696799291377706, + "learning_rate": 9.998599774863023e-06, + "loss": 0.38020282, + "memory(GiB)": 33.01, + "step": 3450, + "train_speed(iter/s)": 0.187291 + }, + { + "acc": 0.9194087, + "epoch": 1.1447978793903246, + "grad_norm": 3.2008164309337364, + "learning_rate": 9.998567176228341e-06, + "loss": 0.38544326, + "memory(GiB)": 33.01, + "step": 3455, + "train_speed(iter/s)": 0.187299 + }, + { + "acc": 0.91780272, + "epoch": 1.1464546056991385, + "grad_norm": 2.840912562079474, + "learning_rate": 9.998534202537615e-06, + "loss": 0.37580428, + "memory(GiB)": 33.01, + "step": 3460, + "train_speed(iter/s)": 0.187307 + }, + { + "acc": 0.91689548, + "epoch": 1.1481113320079523, + "grad_norm": 3.851478472278105, + "learning_rate": 9.99850085379332e-06, + "loss": 0.40554276, + "memory(GiB)": 33.01, + "step": 3465, + "train_speed(iter/s)": 0.187314 + }, + { + "acc": 0.91284361, + "epoch": 1.149768058316766, + "grad_norm": 3.2849355891803858, + "learning_rate": 9.99846712999796e-06, + "loss": 0.38951898, + "memory(GiB)": 33.01, + "step": 3470, + "train_speed(iter/s)": 0.187322 + }, + { + "acc": 0.90839863, + "epoch": 1.1514247846255798, + "grad_norm": 3.633392809645267, + "learning_rate": 9.998433031154064e-06, + "loss": 0.39964263, + "memory(GiB)": 33.01, + "step": 3475, + "train_speed(iter/s)": 0.187329 + }, + { + "acc": 0.92207413, + "epoch": 1.1530815109343937, + "grad_norm": 2.5154870578677477, + "learning_rate": 9.998398557264191e-06, + "loss": 0.37701032, + "memory(GiB)": 33.01, + "step": 3480, + "train_speed(iter/s)": 0.187337 + }, + { + "acc": 0.91332684, + "epoch": 1.1547382372432073, + "grad_norm": 3.25480052804041, + "learning_rate": 9.998363708330927e-06, + "loss": 0.40475931, + "memory(GiB)": 33.01, + "step": 3485, + "train_speed(iter/s)": 0.187344 + }, + { + "acc": 0.9196701, + "epoch": 1.1563949635520212, + "grad_norm": 2.3516656649413874, + "learning_rate": 9.998328484356889e-06, + "loss": 0.39389837, + "memory(GiB)": 33.01, + "step": 3490, + "train_speed(iter/s)": 0.187352 + }, + { + "acc": 0.92585783, + "epoch": 1.158051689860835, + "grad_norm": 2.8555420991231295, + "learning_rate": 9.998292885344719e-06, + "loss": 0.37741697, + "memory(GiB)": 33.01, + "step": 3495, + "train_speed(iter/s)": 0.18736 + }, + { + "acc": 0.91590786, + "epoch": 1.1597084161696487, + "grad_norm": 2.5764556931798053, + "learning_rate": 9.99825691129709e-06, + "loss": 0.36057463, + "memory(GiB)": 33.01, + "step": 3500, + "train_speed(iter/s)": 0.187368 + }, + { + "acc": 0.92557907, + "epoch": 1.1613651424784626, + "grad_norm": 2.5339370927921654, + "learning_rate": 9.998220562216697e-06, + "loss": 0.37259173, + "memory(GiB)": 33.01, + "step": 3505, + "train_speed(iter/s)": 0.187376 + }, + { + "acc": 0.92664833, + "epoch": 1.1630218687872764, + "grad_norm": 2.273777795908151, + "learning_rate": 9.998183838106274e-06, + "loss": 0.33274937, + "memory(GiB)": 33.01, + "step": 3510, + "train_speed(iter/s)": 0.187383 + }, + { + "acc": 0.92987843, + "epoch": 1.16467859509609, + "grad_norm": 4.244783691017268, + "learning_rate": 9.998146738968571e-06, + "loss": 0.34606485, + "memory(GiB)": 33.01, + "step": 3515, + "train_speed(iter/s)": 0.187391 + }, + { + "acc": 0.92926168, + "epoch": 1.166335321404904, + "grad_norm": 2.886560446164786, + "learning_rate": 9.998109264806377e-06, + "loss": 0.36356709, + "memory(GiB)": 33.01, + "step": 3520, + "train_speed(iter/s)": 0.187398 + }, + { + "acc": 0.92352371, + "epoch": 1.1679920477137178, + "grad_norm": 3.5886682050868384, + "learning_rate": 9.9980714156225e-06, + "loss": 0.36706614, + "memory(GiB)": 33.01, + "step": 3525, + "train_speed(iter/s)": 0.187406 + }, + { + "acc": 0.92344646, + "epoch": 1.1696487740225314, + "grad_norm": 3.1139911670284848, + "learning_rate": 9.998033191419783e-06, + "loss": 0.33353124, + "memory(GiB)": 33.01, + "step": 3530, + "train_speed(iter/s)": 0.187414 + }, + { + "acc": 0.92489176, + "epoch": 1.1713055003313453, + "grad_norm": 3.2696372919157977, + "learning_rate": 9.997994592201091e-06, + "loss": 0.36244659, + "memory(GiB)": 33.01, + "step": 3535, + "train_speed(iter/s)": 0.187422 + }, + { + "acc": 0.92667379, + "epoch": 1.1729622266401591, + "grad_norm": 2.840594409063846, + "learning_rate": 9.997955617969324e-06, + "loss": 0.33494053, + "memory(GiB)": 33.01, + "step": 3540, + "train_speed(iter/s)": 0.187422 + }, + { + "acc": 0.93094053, + "epoch": 1.1746189529489728, + "grad_norm": 2.6007626989163803, + "learning_rate": 9.997916268727406e-06, + "loss": 0.32520399, + "memory(GiB)": 33.01, + "step": 3545, + "train_speed(iter/s)": 0.187424 + }, + { + "acc": 0.92882347, + "epoch": 1.1762756792577866, + "grad_norm": 2.1525868787398847, + "learning_rate": 9.997876544478288e-06, + "loss": 0.34395452, + "memory(GiB)": 33.01, + "step": 3550, + "train_speed(iter/s)": 0.187422 + }, + { + "acc": 0.91319542, + "epoch": 1.1779324055666005, + "grad_norm": 4.288583356910695, + "learning_rate": 9.997836445224954e-06, + "loss": 0.37349844, + "memory(GiB)": 33.01, + "step": 3555, + "train_speed(iter/s)": 0.18743 + }, + { + "acc": 0.90995693, + "epoch": 1.1795891318754141, + "grad_norm": 2.533392777088841, + "learning_rate": 9.997795970970409e-06, + "loss": 0.39516404, + "memory(GiB)": 33.01, + "step": 3560, + "train_speed(iter/s)": 0.187438 + }, + { + "acc": 0.91162281, + "epoch": 1.181245858184228, + "grad_norm": 3.4627809496510094, + "learning_rate": 9.997755121717694e-06, + "loss": 0.43464499, + "memory(GiB)": 33.01, + "step": 3565, + "train_speed(iter/s)": 0.187446 + }, + { + "acc": 0.91588955, + "epoch": 1.1829025844930419, + "grad_norm": 4.111033674930325, + "learning_rate": 9.997713897469871e-06, + "loss": 0.38906169, + "memory(GiB)": 33.01, + "step": 3570, + "train_speed(iter/s)": 0.187453 + }, + { + "acc": 0.91391239, + "epoch": 1.1845593108018555, + "grad_norm": 3.1895353282084073, + "learning_rate": 9.997672298230037e-06, + "loss": 0.39768865, + "memory(GiB)": 33.01, + "step": 3575, + "train_speed(iter/s)": 0.187461 + }, + { + "acc": 0.91793308, + "epoch": 1.1862160371106694, + "grad_norm": 2.7817667892630813, + "learning_rate": 9.99763032400131e-06, + "loss": 0.39883778, + "memory(GiB)": 33.01, + "step": 3580, + "train_speed(iter/s)": 0.187469 + }, + { + "acc": 0.92342186, + "epoch": 1.1878727634194832, + "grad_norm": 2.128217928171914, + "learning_rate": 9.99758797478684e-06, + "loss": 0.3766788, + "memory(GiB)": 33.01, + "step": 3585, + "train_speed(iter/s)": 0.187476 + }, + { + "acc": 0.9227787, + "epoch": 1.1895294897282969, + "grad_norm": 2.417063985621045, + "learning_rate": 9.99754525058981e-06, + "loss": 0.33557951, + "memory(GiB)": 33.01, + "step": 3590, + "train_speed(iter/s)": 0.187484 + }, + { + "acc": 0.92011023, + "epoch": 1.1911862160371107, + "grad_norm": 2.6827761651541944, + "learning_rate": 9.99750215141342e-06, + "loss": 0.35906065, + "memory(GiB)": 33.01, + "step": 3595, + "train_speed(iter/s)": 0.187491 + }, + { + "acc": 0.92309437, + "epoch": 1.1928429423459244, + "grad_norm": 3.2842965392742225, + "learning_rate": 9.997458677260906e-06, + "loss": 0.34788375, + "memory(GiB)": 33.01, + "step": 3600, + "train_speed(iter/s)": 0.187499 + }, + { + "acc": 0.92133265, + "epoch": 1.1944996686547382, + "grad_norm": 3.463747502790954, + "learning_rate": 9.997414828135532e-06, + "loss": 0.37525055, + "memory(GiB)": 33.01, + "step": 3605, + "train_speed(iter/s)": 0.187506 + }, + { + "acc": 0.93312187, + "epoch": 1.196156394963552, + "grad_norm": 4.264407587151082, + "learning_rate": 9.997370604040587e-06, + "loss": 0.31689456, + "memory(GiB)": 33.01, + "step": 3610, + "train_speed(iter/s)": 0.187514 + }, + { + "acc": 0.93518686, + "epoch": 1.1978131212723657, + "grad_norm": 3.0239619820220542, + "learning_rate": 9.997326004979391e-06, + "loss": 0.33057671, + "memory(GiB)": 33.01, + "step": 3615, + "train_speed(iter/s)": 0.187522 + }, + { + "acc": 0.91941586, + "epoch": 1.1994698475811796, + "grad_norm": 2.587705719863201, + "learning_rate": 9.997281030955288e-06, + "loss": 0.36802444, + "memory(GiB)": 33.01, + "step": 3620, + "train_speed(iter/s)": 0.187529 + }, + { + "acc": 0.91424217, + "epoch": 1.2011265738899934, + "grad_norm": 4.536591989811321, + "learning_rate": 9.997235681971656e-06, + "loss": 0.40987997, + "memory(GiB)": 33.01, + "step": 3625, + "train_speed(iter/s)": 0.187536 + }, + { + "acc": 0.93441477, + "epoch": 1.202783300198807, + "grad_norm": 3.8329063732002293, + "learning_rate": 9.997189958031895e-06, + "loss": 0.31681237, + "memory(GiB)": 33.01, + "step": 3630, + "train_speed(iter/s)": 0.187543 + }, + { + "acc": 0.9128912, + "epoch": 1.204440026507621, + "grad_norm": 3.702790490407351, + "learning_rate": 9.997143859139439e-06, + "loss": 0.3997297, + "memory(GiB)": 33.01, + "step": 3635, + "train_speed(iter/s)": 0.18755 + }, + { + "acc": 0.91567707, + "epoch": 1.2060967528164348, + "grad_norm": 3.384491618727864, + "learning_rate": 9.997097385297744e-06, + "loss": 0.38490405, + "memory(GiB)": 33.01, + "step": 3640, + "train_speed(iter/s)": 0.187558 + }, + { + "acc": 0.93141842, + "epoch": 1.2077534791252484, + "grad_norm": 2.914997162442394, + "learning_rate": 9.9970505365103e-06, + "loss": 0.34473834, + "memory(GiB)": 33.01, + "step": 3645, + "train_speed(iter/s)": 0.187565 + }, + { + "acc": 0.92478352, + "epoch": 1.2094102054340623, + "grad_norm": 2.3528754980819504, + "learning_rate": 9.997003312780622e-06, + "loss": 0.35598297, + "memory(GiB)": 33.01, + "step": 3650, + "train_speed(iter/s)": 0.187573 + }, + { + "acc": 0.92494583, + "epoch": 1.2110669317428762, + "grad_norm": 6.933560996174331, + "learning_rate": 9.996955714112254e-06, + "loss": 0.35216231, + "memory(GiB)": 33.01, + "step": 3655, + "train_speed(iter/s)": 0.18758 + }, + { + "acc": 0.9253911, + "epoch": 1.2127236580516898, + "grad_norm": 3.5739708668035495, + "learning_rate": 9.996907740508767e-06, + "loss": 0.35801926, + "memory(GiB)": 33.01, + "step": 3660, + "train_speed(iter/s)": 0.187587 + }, + { + "acc": 0.92626677, + "epoch": 1.2143803843605037, + "grad_norm": 4.233605139073024, + "learning_rate": 9.996859391973761e-06, + "loss": 0.38392704, + "memory(GiB)": 33.01, + "step": 3665, + "train_speed(iter/s)": 0.187594 + }, + { + "acc": 0.92703028, + "epoch": 1.2160371106693175, + "grad_norm": 3.634820256417589, + "learning_rate": 9.996810668510865e-06, + "loss": 0.34094188, + "memory(GiB)": 33.01, + "step": 3670, + "train_speed(iter/s)": 0.187601 + }, + { + "acc": 0.93061657, + "epoch": 1.2176938369781312, + "grad_norm": 5.114858856367563, + "learning_rate": 9.996761570123734e-06, + "loss": 0.31726093, + "memory(GiB)": 33.01, + "step": 3675, + "train_speed(iter/s)": 0.187608 + }, + { + "acc": 0.92885113, + "epoch": 1.219350563286945, + "grad_norm": 4.652957243697469, + "learning_rate": 9.996712096816054e-06, + "loss": 0.32563906, + "memory(GiB)": 33.01, + "step": 3680, + "train_speed(iter/s)": 0.187616 + }, + { + "acc": 0.9350296, + "epoch": 1.2210072895957587, + "grad_norm": 3.5677819539479896, + "learning_rate": 9.996662248591534e-06, + "loss": 0.31505063, + "memory(GiB)": 33.01, + "step": 3685, + "train_speed(iter/s)": 0.187623 + }, + { + "acc": 0.92970486, + "epoch": 1.2226640159045725, + "grad_norm": 3.7798542926882677, + "learning_rate": 9.996612025453919e-06, + "loss": 0.34854329, + "memory(GiB)": 33.01, + "step": 3690, + "train_speed(iter/s)": 0.18763 + }, + { + "acc": 0.92640114, + "epoch": 1.2243207422133864, + "grad_norm": 3.2775733400719997, + "learning_rate": 9.996561427406974e-06, + "loss": 0.34672508, + "memory(GiB)": 33.01, + "step": 3695, + "train_speed(iter/s)": 0.187637 + }, + { + "acc": 0.92301512, + "epoch": 1.2259774685222, + "grad_norm": 3.7070009363339094, + "learning_rate": 9.996510454454502e-06, + "loss": 0.33321366, + "memory(GiB)": 33.01, + "step": 3700, + "train_speed(iter/s)": 0.187644 + }, + { + "acc": 0.91633167, + "epoch": 1.2276341948310139, + "grad_norm": 3.9089476682504403, + "learning_rate": 9.996459106600319e-06, + "loss": 0.36949565, + "memory(GiB)": 33.01, + "step": 3705, + "train_speed(iter/s)": 0.187651 + }, + { + "acc": 0.92749081, + "epoch": 1.2292909211398277, + "grad_norm": 2.383069253984934, + "learning_rate": 9.996407383848283e-06, + "loss": 0.36472716, + "memory(GiB)": 33.01, + "step": 3710, + "train_speed(iter/s)": 0.187658 + }, + { + "acc": 0.92329865, + "epoch": 1.2309476474486414, + "grad_norm": 2.355621651870094, + "learning_rate": 9.996355286202277e-06, + "loss": 0.33665237, + "memory(GiB)": 33.01, + "step": 3715, + "train_speed(iter/s)": 0.187665 + }, + { + "acc": 0.92338848, + "epoch": 1.2326043737574552, + "grad_norm": 2.982481024537641, + "learning_rate": 9.996302813666209e-06, + "loss": 0.35028746, + "memory(GiB)": 33.01, + "step": 3720, + "train_speed(iter/s)": 0.187672 + }, + { + "acc": 0.92760925, + "epoch": 1.234261100066269, + "grad_norm": 2.2523268776096006, + "learning_rate": 9.996249966244015e-06, + "loss": 0.30640817, + "memory(GiB)": 33.01, + "step": 3725, + "train_speed(iter/s)": 0.187679 + }, + { + "acc": 0.91822443, + "epoch": 1.2359178263750827, + "grad_norm": 4.214879626463359, + "learning_rate": 9.996196743939663e-06, + "loss": 0.36053338, + "memory(GiB)": 33.01, + "step": 3730, + "train_speed(iter/s)": 0.187686 + }, + { + "acc": 0.9352869, + "epoch": 1.2375745526838966, + "grad_norm": 2.1221716411892793, + "learning_rate": 9.996143146757144e-06, + "loss": 0.31079593, + "memory(GiB)": 33.01, + "step": 3735, + "train_speed(iter/s)": 0.187692 + }, + { + "acc": 0.93244896, + "epoch": 1.2392312789927105, + "grad_norm": 2.7792772570584483, + "learning_rate": 9.99608917470048e-06, + "loss": 0.31752284, + "memory(GiB)": 33.01, + "step": 3740, + "train_speed(iter/s)": 0.187699 + }, + { + "acc": 0.92920828, + "epoch": 1.240888005301524, + "grad_norm": 4.50665904388833, + "learning_rate": 9.996034827773726e-06, + "loss": 0.31363287, + "memory(GiB)": 33.01, + "step": 3745, + "train_speed(iter/s)": 0.187706 + }, + { + "acc": 0.93048716, + "epoch": 1.242544731610338, + "grad_norm": 5.150303884251782, + "learning_rate": 9.995980105980956e-06, + "loss": 0.32176299, + "memory(GiB)": 33.01, + "step": 3750, + "train_speed(iter/s)": 0.187713 + }, + { + "acc": 0.92537174, + "epoch": 1.2442014579191518, + "grad_norm": 3.820148408737309, + "learning_rate": 9.995925009326277e-06, + "loss": 0.35507479, + "memory(GiB)": 33.01, + "step": 3755, + "train_speed(iter/s)": 0.18772 + }, + { + "acc": 0.92827415, + "epoch": 1.2458581842279655, + "grad_norm": 3.1128400564399414, + "learning_rate": 9.995869537813824e-06, + "loss": 0.36116066, + "memory(GiB)": 33.01, + "step": 3760, + "train_speed(iter/s)": 0.187727 + }, + { + "acc": 0.92336235, + "epoch": 1.2475149105367793, + "grad_norm": 3.080411070133688, + "learning_rate": 9.995813691447756e-06, + "loss": 0.3824018, + "memory(GiB)": 33.01, + "step": 3765, + "train_speed(iter/s)": 0.187728 + }, + { + "acc": 0.92660742, + "epoch": 1.2491716368455932, + "grad_norm": 2.616635669642493, + "learning_rate": 9.995757470232271e-06, + "loss": 0.3477334, + "memory(GiB)": 33.01, + "step": 3770, + "train_speed(iter/s)": 0.187729 + }, + { + "acc": 0.9220067, + "epoch": 1.2508283631544068, + "grad_norm": 3.102245923679212, + "learning_rate": 9.995700874171583e-06, + "loss": 0.3722064, + "memory(GiB)": 33.01, + "step": 3775, + "train_speed(iter/s)": 0.187728 + }, + { + "acc": 0.92387257, + "epoch": 1.2524850894632207, + "grad_norm": 3.4960303388785436, + "learning_rate": 9.995643903269939e-06, + "loss": 0.34432602, + "memory(GiB)": 33.01, + "step": 3780, + "train_speed(iter/s)": 0.187732 + }, + { + "acc": 0.92673817, + "epoch": 1.2541418157720345, + "grad_norm": 3.726521024830549, + "learning_rate": 9.995586557531614e-06, + "loss": 0.35871067, + "memory(GiB)": 33.01, + "step": 3785, + "train_speed(iter/s)": 0.187739 + }, + { + "acc": 0.92667179, + "epoch": 1.2557985420808482, + "grad_norm": 4.925958542452472, + "learning_rate": 9.995528836960913e-06, + "loss": 0.32628684, + "memory(GiB)": 33.01, + "step": 3790, + "train_speed(iter/s)": 0.187746 + }, + { + "acc": 0.923773, + "epoch": 1.257455268389662, + "grad_norm": 2.7790224059485245, + "learning_rate": 9.995470741562167e-06, + "loss": 0.34259708, + "memory(GiB)": 33.01, + "step": 3795, + "train_speed(iter/s)": 0.187753 + }, + { + "acc": 0.92042322, + "epoch": 1.259111994698476, + "grad_norm": 4.687488642901812, + "learning_rate": 9.995412271339736e-06, + "loss": 0.36016665, + "memory(GiB)": 33.01, + "step": 3800, + "train_speed(iter/s)": 0.187761 + }, + { + "acc": 0.92206593, + "epoch": 1.2607687210072895, + "grad_norm": 2.4026412571271343, + "learning_rate": 9.995353426298003e-06, + "loss": 0.39115138, + "memory(GiB)": 33.01, + "step": 3805, + "train_speed(iter/s)": 0.187768 + }, + { + "acc": 0.91857586, + "epoch": 1.2624254473161034, + "grad_norm": 4.513103775057416, + "learning_rate": 9.995294206441392e-06, + "loss": 0.36755357, + "memory(GiB)": 33.01, + "step": 3810, + "train_speed(iter/s)": 0.187775 + }, + { + "acc": 0.92359676, + "epoch": 1.2640821736249173, + "grad_norm": 3.262137887759917, + "learning_rate": 9.995234611774339e-06, + "loss": 0.32638867, + "memory(GiB)": 33.01, + "step": 3815, + "train_speed(iter/s)": 0.187781 + }, + { + "acc": 0.92083435, + "epoch": 1.265738899933731, + "grad_norm": 3.1376984773270946, + "learning_rate": 9.995174642301319e-06, + "loss": 0.39106462, + "memory(GiB)": 33.01, + "step": 3820, + "train_speed(iter/s)": 0.187788 + }, + { + "acc": 0.9268259, + "epoch": 1.2673956262425448, + "grad_norm": 3.6070613653945194, + "learning_rate": 9.995114298026833e-06, + "loss": 0.35192406, + "memory(GiB)": 33.01, + "step": 3825, + "train_speed(iter/s)": 0.187795 + }, + { + "acc": 0.92944641, + "epoch": 1.2690523525513586, + "grad_norm": 2.3387275683835504, + "learning_rate": 9.995053578955408e-06, + "loss": 0.31027389, + "memory(GiB)": 33.01, + "step": 3830, + "train_speed(iter/s)": 0.187802 + }, + { + "acc": 0.91999998, + "epoch": 1.2707090788601723, + "grad_norm": 3.382579511439133, + "learning_rate": 9.994992485091603e-06, + "loss": 0.35692229, + "memory(GiB)": 33.01, + "step": 3835, + "train_speed(iter/s)": 0.187808 + }, + { + "acc": 0.92861013, + "epoch": 1.2723658051689861, + "grad_norm": 3.6141616570107207, + "learning_rate": 9.994931016439998e-06, + "loss": 0.36012015, + "memory(GiB)": 33.01, + "step": 3840, + "train_speed(iter/s)": 0.187815 + }, + { + "acc": 0.92661362, + "epoch": 1.2740225314778, + "grad_norm": 2.7014468390409982, + "learning_rate": 9.99486917300521e-06, + "loss": 0.35332563, + "memory(GiB)": 33.01, + "step": 3845, + "train_speed(iter/s)": 0.187821 + }, + { + "acc": 0.93278389, + "epoch": 1.2756792577866136, + "grad_norm": 2.9190583238133536, + "learning_rate": 9.994806954791877e-06, + "loss": 0.32674026, + "memory(GiB)": 33.01, + "step": 3850, + "train_speed(iter/s)": 0.187828 + }, + { + "acc": 0.92834053, + "epoch": 1.2773359840954275, + "grad_norm": 3.162888980927606, + "learning_rate": 9.99474436180467e-06, + "loss": 0.35694146, + "memory(GiB)": 33.01, + "step": 3855, + "train_speed(iter/s)": 0.187834 + }, + { + "acc": 0.93106079, + "epoch": 1.2789927104042413, + "grad_norm": 2.180292217336007, + "learning_rate": 9.994681394048283e-06, + "loss": 0.31116631, + "memory(GiB)": 33.01, + "step": 3860, + "train_speed(iter/s)": 0.18784 + }, + { + "acc": 0.92896137, + "epoch": 1.280649436713055, + "grad_norm": 3.725631564421506, + "learning_rate": 9.994618051527444e-06, + "loss": 0.32231021, + "memory(GiB)": 33.01, + "step": 3865, + "train_speed(iter/s)": 0.187847 + }, + { + "acc": 0.928269, + "epoch": 1.2823061630218688, + "grad_norm": 3.8065518382089776, + "learning_rate": 9.994554334246902e-06, + "loss": 0.30769591, + "memory(GiB)": 33.01, + "step": 3870, + "train_speed(iter/s)": 0.187853 + }, + { + "acc": 0.93014402, + "epoch": 1.2839628893306827, + "grad_norm": 3.280700777914299, + "learning_rate": 9.994490242211445e-06, + "loss": 0.33068173, + "memory(GiB)": 33.01, + "step": 3875, + "train_speed(iter/s)": 0.18786 + }, + { + "acc": 0.93122234, + "epoch": 1.2856196156394963, + "grad_norm": 2.765686712261379, + "learning_rate": 9.994425775425877e-06, + "loss": 0.35716617, + "memory(GiB)": 33.01, + "step": 3880, + "train_speed(iter/s)": 0.187866 + }, + { + "acc": 0.93585377, + "epoch": 1.2872763419483102, + "grad_norm": 4.219149782391032, + "learning_rate": 9.994360933895037e-06, + "loss": 0.29821897, + "memory(GiB)": 33.01, + "step": 3885, + "train_speed(iter/s)": 0.187873 + }, + { + "acc": 0.94056263, + "epoch": 1.288933068257124, + "grad_norm": 3.7700987238795864, + "learning_rate": 9.994295717623792e-06, + "loss": 0.30137362, + "memory(GiB)": 33.01, + "step": 3890, + "train_speed(iter/s)": 0.187879 + }, + { + "acc": 0.92737751, + "epoch": 1.2905897945659377, + "grad_norm": 3.636527628612316, + "learning_rate": 9.994230126617035e-06, + "loss": 0.2921406, + "memory(GiB)": 33.01, + "step": 3895, + "train_speed(iter/s)": 0.187886 + }, + { + "acc": 0.93736925, + "epoch": 1.2922465208747516, + "grad_norm": 3.6644248366985575, + "learning_rate": 9.99416416087969e-06, + "loss": 0.29372125, + "memory(GiB)": 33.01, + "step": 3900, + "train_speed(iter/s)": 0.187892 + }, + { + "acc": 0.93469362, + "epoch": 1.2939032471835652, + "grad_norm": 3.5477819583404697, + "learning_rate": 9.994097820416702e-06, + "loss": 0.30780063, + "memory(GiB)": 33.01, + "step": 3905, + "train_speed(iter/s)": 0.187898 + }, + { + "acc": 0.92783298, + "epoch": 1.295559973492379, + "grad_norm": 3.141078965900213, + "learning_rate": 9.994031105233052e-06, + "loss": 0.31574149, + "memory(GiB)": 33.01, + "step": 3910, + "train_speed(iter/s)": 0.187905 + }, + { + "acc": 0.93738155, + "epoch": 1.297216699801193, + "grad_norm": 2.962093841370019, + "learning_rate": 9.993964015333749e-06, + "loss": 0.29025574, + "memory(GiB)": 33.01, + "step": 3915, + "train_speed(iter/s)": 0.187911 + }, + { + "acc": 0.92963943, + "epoch": 1.2988734261100066, + "grad_norm": 3.450074619131517, + "learning_rate": 9.993896550723824e-06, + "loss": 0.32397094, + "memory(GiB)": 33.01, + "step": 3920, + "train_speed(iter/s)": 0.187917 + }, + { + "acc": 0.92763071, + "epoch": 1.3005301524188204, + "grad_norm": 2.275170733239321, + "learning_rate": 9.993828711408341e-06, + "loss": 0.31115437, + "memory(GiB)": 33.01, + "step": 3925, + "train_speed(iter/s)": 0.187924 + }, + { + "acc": 0.93752851, + "epoch": 1.302186878727634, + "grad_norm": 2.4918963458658965, + "learning_rate": 9.993760497392388e-06, + "loss": 0.31371839, + "memory(GiB)": 33.01, + "step": 3930, + "train_speed(iter/s)": 0.18793 + }, + { + "acc": 0.9316061, + "epoch": 1.303843605036448, + "grad_norm": 3.053424190449544, + "learning_rate": 9.993691908681089e-06, + "loss": 0.35731153, + "memory(GiB)": 33.01, + "step": 3935, + "train_speed(iter/s)": 0.187936 + }, + { + "acc": 0.93001118, + "epoch": 1.3055003313452618, + "grad_norm": 2.388009315776182, + "learning_rate": 9.993622945279588e-06, + "loss": 0.2991662, + "memory(GiB)": 33.01, + "step": 3940, + "train_speed(iter/s)": 0.187942 + }, + { + "acc": 0.93689032, + "epoch": 1.3071570576540754, + "grad_norm": 2.625762219536255, + "learning_rate": 9.99355360719306e-06, + "loss": 0.30675516, + "memory(GiB)": 33.01, + "step": 3945, + "train_speed(iter/s)": 0.187949 + }, + { + "acc": 0.94865704, + "epoch": 1.3088137839628893, + "grad_norm": 2.9206118345721466, + "learning_rate": 9.993483894426706e-06, + "loss": 0.29642808, + "memory(GiB)": 33.01, + "step": 3950, + "train_speed(iter/s)": 0.187955 + }, + { + "acc": 0.92296667, + "epoch": 1.3104705102717031, + "grad_norm": 3.3467743607062808, + "learning_rate": 9.99341380698576e-06, + "loss": 0.33743124, + "memory(GiB)": 33.01, + "step": 3955, + "train_speed(iter/s)": 0.187962 + }, + { + "acc": 0.93078327, + "epoch": 1.3121272365805168, + "grad_norm": 2.1518445462143565, + "learning_rate": 9.993343344875482e-06, + "loss": 0.30151925, + "memory(GiB)": 33.01, + "step": 3960, + "train_speed(iter/s)": 0.187967 + }, + { + "acc": 0.93073835, + "epoch": 1.3137839628893306, + "grad_norm": 3.1234988518274767, + "learning_rate": 9.993272508101156e-06, + "loss": 0.29494073, + "memory(GiB)": 33.01, + "step": 3965, + "train_speed(iter/s)": 0.187973 + }, + { + "acc": 0.94187469, + "epoch": 1.3154406891981445, + "grad_norm": 2.884147113438224, + "learning_rate": 9.993201296668103e-06, + "loss": 0.30527532, + "memory(GiB)": 33.01, + "step": 3970, + "train_speed(iter/s)": 0.18798 + }, + { + "acc": 0.93317776, + "epoch": 1.3170974155069581, + "grad_norm": 2.596396758677392, + "learning_rate": 9.993129710581662e-06, + "loss": 0.28667531, + "memory(GiB)": 33.01, + "step": 3975, + "train_speed(iter/s)": 0.187986 + }, + { + "acc": 0.93754797, + "epoch": 1.318754141815772, + "grad_norm": 4.301438589905388, + "learning_rate": 9.993057749847207e-06, + "loss": 0.32379344, + "memory(GiB)": 33.01, + "step": 3980, + "train_speed(iter/s)": 0.187993 + }, + { + "acc": 0.92824039, + "epoch": 1.3204108681245859, + "grad_norm": 3.140141491914004, + "learning_rate": 9.992985414470136e-06, + "loss": 0.33532877, + "memory(GiB)": 33.01, + "step": 3985, + "train_speed(iter/s)": 0.187998 + }, + { + "acc": 0.93718243, + "epoch": 1.3220675944333995, + "grad_norm": 3.5344457077050038, + "learning_rate": 9.992912704455879e-06, + "loss": 0.36814497, + "memory(GiB)": 33.01, + "step": 3990, + "train_speed(iter/s)": 0.188004 + }, + { + "acc": 0.93041048, + "epoch": 1.3237243207422134, + "grad_norm": 2.7729199897251084, + "learning_rate": 9.992839619809893e-06, + "loss": 0.31639609, + "memory(GiB)": 33.01, + "step": 3995, + "train_speed(iter/s)": 0.188002 + }, + { + "acc": 0.93354034, + "epoch": 1.3253810470510272, + "grad_norm": 3.449036303659263, + "learning_rate": 9.992766160537659e-06, + "loss": 0.3051719, + "memory(GiB)": 33.01, + "step": 4000, + "train_speed(iter/s)": 0.188 + }, + { + "acc": 0.93086681, + "epoch": 1.3270377733598409, + "grad_norm": 3.0296237355416524, + "learning_rate": 9.992692326644693e-06, + "loss": 0.33369646, + "memory(GiB)": 33.01, + "step": 4005, + "train_speed(iter/s)": 0.188 + }, + { + "acc": 0.9316391, + "epoch": 1.3286944996686547, + "grad_norm": 2.5657778008931222, + "learning_rate": 9.992618118136533e-06, + "loss": 0.28221853, + "memory(GiB)": 33.01, + "step": 4010, + "train_speed(iter/s)": 0.188006 + }, + { + "acc": 0.92925653, + "epoch": 1.3303512259774686, + "grad_norm": 4.649549083512319, + "learning_rate": 9.99254353501875e-06, + "loss": 0.30690947, + "memory(GiB)": 33.01, + "step": 4015, + "train_speed(iter/s)": 0.188012 + }, + { + "acc": 0.94538603, + "epoch": 1.3320079522862822, + "grad_norm": 3.626225624410193, + "learning_rate": 9.992468577296935e-06, + "loss": 0.27149005, + "memory(GiB)": 33.01, + "step": 4020, + "train_speed(iter/s)": 0.188019 + }, + { + "acc": 0.94822292, + "epoch": 1.333664678595096, + "grad_norm": 3.855866149357719, + "learning_rate": 9.992393244976721e-06, + "loss": 0.26162114, + "memory(GiB)": 33.01, + "step": 4025, + "train_speed(iter/s)": 0.188025 + }, + { + "acc": 0.92598343, + "epoch": 1.33532140490391, + "grad_norm": 3.7938424779242004, + "learning_rate": 9.992317538063755e-06, + "loss": 0.34145732, + "memory(GiB)": 33.01, + "step": 4030, + "train_speed(iter/s)": 0.188031 + }, + { + "acc": 0.92380657, + "epoch": 1.3369781312127236, + "grad_norm": 4.716646594593983, + "learning_rate": 9.992241456563721e-06, + "loss": 0.38014688, + "memory(GiB)": 33.01, + "step": 4035, + "train_speed(iter/s)": 0.188037 + }, + { + "acc": 0.93459358, + "epoch": 1.3386348575215374, + "grad_norm": 4.612683598207418, + "learning_rate": 9.992165000482328e-06, + "loss": 0.2748986, + "memory(GiB)": 33.01, + "step": 4040, + "train_speed(iter/s)": 0.188042 + }, + { + "acc": 0.93223648, + "epoch": 1.3402915838303513, + "grad_norm": 2.592110016351656, + "learning_rate": 9.992088169825311e-06, + "loss": 0.32904916, + "memory(GiB)": 33.01, + "step": 4045, + "train_speed(iter/s)": 0.188049 + }, + { + "acc": 0.93688803, + "epoch": 1.341948310139165, + "grad_norm": 4.644288092052374, + "learning_rate": 9.992010964598436e-06, + "loss": 0.31696992, + "memory(GiB)": 33.01, + "step": 4050, + "train_speed(iter/s)": 0.188055 + }, + { + "acc": 0.93946381, + "epoch": 1.3436050364479788, + "grad_norm": 4.344444643704393, + "learning_rate": 9.9919333848075e-06, + "loss": 0.31000085, + "memory(GiB)": 33.01, + "step": 4055, + "train_speed(iter/s)": 0.188061 + }, + { + "acc": 0.93221197, + "epoch": 1.3452617627567927, + "grad_norm": 5.532366915026156, + "learning_rate": 9.991855430458322e-06, + "loss": 0.28737183, + "memory(GiB)": 33.01, + "step": 4060, + "train_speed(iter/s)": 0.188067 + }, + { + "acc": 0.94824142, + "epoch": 1.3469184890656063, + "grad_norm": 3.4428498074966236, + "learning_rate": 9.99177710155675e-06, + "loss": 0.28464704, + "memory(GiB)": 33.01, + "step": 4065, + "train_speed(iter/s)": 0.188072 + }, + { + "acc": 0.93260765, + "epoch": 1.3485752153744202, + "grad_norm": 4.935276047615211, + "learning_rate": 9.991698398108664e-06, + "loss": 0.29902873, + "memory(GiB)": 33.01, + "step": 4070, + "train_speed(iter/s)": 0.188078 + }, + { + "acc": 0.94271441, + "epoch": 1.350231941683234, + "grad_norm": 3.9581108800078924, + "learning_rate": 9.99161932011997e-06, + "loss": 0.27665021, + "memory(GiB)": 33.01, + "step": 4075, + "train_speed(iter/s)": 0.188084 + }, + { + "acc": 0.9331852, + "epoch": 1.3518886679920477, + "grad_norm": 3.4147382524913255, + "learning_rate": 9.9915398675966e-06, + "loss": 0.30838275, + "memory(GiB)": 33.01, + "step": 4080, + "train_speed(iter/s)": 0.18809 + }, + { + "acc": 0.93401146, + "epoch": 1.3535453943008615, + "grad_norm": 2.9195817564260866, + "learning_rate": 9.991460040544518e-06, + "loss": 0.29754572, + "memory(GiB)": 33.01, + "step": 4085, + "train_speed(iter/s)": 0.188096 + }, + { + "acc": 0.9369833, + "epoch": 1.3552021206096754, + "grad_norm": 3.8261472684027593, + "learning_rate": 9.991379838969713e-06, + "loss": 0.27704296, + "memory(GiB)": 33.01, + "step": 4090, + "train_speed(iter/s)": 0.188102 + }, + { + "acc": 0.93432493, + "epoch": 1.356858846918489, + "grad_norm": 3.0204807759996575, + "learning_rate": 9.991299262878208e-06, + "loss": 0.32643697, + "memory(GiB)": 33.01, + "step": 4095, + "train_speed(iter/s)": 0.188108 + }, + { + "acc": 0.93136787, + "epoch": 1.3585155732273029, + "grad_norm": 2.228315321411614, + "learning_rate": 9.991218312276043e-06, + "loss": 0.29976664, + "memory(GiB)": 33.01, + "step": 4100, + "train_speed(iter/s)": 0.188114 + }, + { + "acc": 0.93832722, + "epoch": 1.3601722995361167, + "grad_norm": 3.2780926082447217, + "learning_rate": 9.991136987169295e-06, + "loss": 0.31551151, + "memory(GiB)": 33.01, + "step": 4105, + "train_speed(iter/s)": 0.18812 + }, + { + "acc": 0.92250309, + "epoch": 1.3618290258449304, + "grad_norm": 2.0967913603524915, + "learning_rate": 9.991055287564067e-06, + "loss": 0.36131248, + "memory(GiB)": 33.01, + "step": 4110, + "train_speed(iter/s)": 0.188126 + }, + { + "acc": 0.9196558, + "epoch": 1.3634857521537442, + "grad_norm": 3.04957877993772, + "learning_rate": 9.99097321346649e-06, + "loss": 0.35065665, + "memory(GiB)": 33.01, + "step": 4115, + "train_speed(iter/s)": 0.188132 + }, + { + "acc": 0.93561573, + "epoch": 1.365142478462558, + "grad_norm": 3.0874861183406823, + "learning_rate": 9.990890764882722e-06, + "loss": 0.30160902, + "memory(GiB)": 33.01, + "step": 4120, + "train_speed(iter/s)": 0.188138 + }, + { + "acc": 0.93084764, + "epoch": 1.3667992047713717, + "grad_norm": 4.396726628372327, + "learning_rate": 9.990807941818951e-06, + "loss": 0.31668425, + "memory(GiB)": 33.01, + "step": 4125, + "train_speed(iter/s)": 0.188143 + }, + { + "acc": 0.94341707, + "epoch": 1.3684559310801856, + "grad_norm": 1.9439284972306012, + "learning_rate": 9.990724744281391e-06, + "loss": 0.2826818, + "memory(GiB)": 33.01, + "step": 4130, + "train_speed(iter/s)": 0.188149 + }, + { + "acc": 0.93765316, + "epoch": 1.3701126573889995, + "grad_norm": 1.8999731884417959, + "learning_rate": 9.990641172276286e-06, + "loss": 0.30255091, + "memory(GiB)": 33.01, + "step": 4135, + "train_speed(iter/s)": 0.188155 + }, + { + "acc": 0.9380127, + "epoch": 1.371769383697813, + "grad_norm": 2.873444423424632, + "learning_rate": 9.990557225809906e-06, + "loss": 0.28472929, + "memory(GiB)": 33.01, + "step": 4140, + "train_speed(iter/s)": 0.188161 + }, + { + "acc": 0.94341812, + "epoch": 1.373426110006627, + "grad_norm": 2.7763226622387758, + "learning_rate": 9.990472904888552e-06, + "loss": 0.27308426, + "memory(GiB)": 33.01, + "step": 4145, + "train_speed(iter/s)": 0.188167 + }, + { + "acc": 0.94635715, + "epoch": 1.3750828363154408, + "grad_norm": 2.5908085822084534, + "learning_rate": 9.990388209518552e-06, + "loss": 0.26468968, + "memory(GiB)": 33.01, + "step": 4150, + "train_speed(iter/s)": 0.188172 + }, + { + "acc": 0.93955574, + "epoch": 1.3767395626242545, + "grad_norm": 4.805493021778142, + "learning_rate": 9.990303139706258e-06, + "loss": 0.29098492, + "memory(GiB)": 33.01, + "step": 4155, + "train_speed(iter/s)": 0.188178 + }, + { + "acc": 0.92813005, + "epoch": 1.3783962889330683, + "grad_norm": 3.9214892603691798, + "learning_rate": 9.990217695458058e-06, + "loss": 0.3426239, + "memory(GiB)": 33.01, + "step": 4160, + "train_speed(iter/s)": 0.188184 + }, + { + "acc": 0.91839867, + "epoch": 1.3800530152418822, + "grad_norm": 4.045315242690874, + "learning_rate": 9.99013187678036e-06, + "loss": 0.3696887, + "memory(GiB)": 33.01, + "step": 4165, + "train_speed(iter/s)": 0.188189 + }, + { + "acc": 0.9324028, + "epoch": 1.3817097415506958, + "grad_norm": 4.321959254473603, + "learning_rate": 9.990045683679605e-06, + "loss": 0.31572053, + "memory(GiB)": 33.01, + "step": 4170, + "train_speed(iter/s)": 0.188195 + }, + { + "acc": 0.92923403, + "epoch": 1.3833664678595097, + "grad_norm": 2.3322544989628042, + "learning_rate": 9.989959116162263e-06, + "loss": 0.33993003, + "memory(GiB)": 33.01, + "step": 4175, + "train_speed(iter/s)": 0.188201 + }, + { + "acc": 0.9367238, + "epoch": 1.3850231941683233, + "grad_norm": 2.451834543607949, + "learning_rate": 9.989872174234829e-06, + "loss": 0.2900305, + "memory(GiB)": 33.01, + "step": 4180, + "train_speed(iter/s)": 0.188206 + }, + { + "acc": 0.93373671, + "epoch": 1.3866799204771372, + "grad_norm": 2.745893558754339, + "learning_rate": 9.989784857903826e-06, + "loss": 0.30362544, + "memory(GiB)": 33.01, + "step": 4185, + "train_speed(iter/s)": 0.188211 + }, + { + "acc": 0.94090681, + "epoch": 1.388336646785951, + "grad_norm": 2.333242180787866, + "learning_rate": 9.989697167175807e-06, + "loss": 0.29308481, + "memory(GiB)": 33.01, + "step": 4190, + "train_speed(iter/s)": 0.188217 + }, + { + "acc": 0.93064957, + "epoch": 1.3899933730947647, + "grad_norm": 2.676326656969343, + "learning_rate": 9.989609102057351e-06, + "loss": 0.3038188, + "memory(GiB)": 33.01, + "step": 4195, + "train_speed(iter/s)": 0.188223 + }, + { + "acc": 0.93896952, + "epoch": 1.3916500994035785, + "grad_norm": 2.926888898906929, + "learning_rate": 9.98952066255507e-06, + "loss": 0.31832986, + "memory(GiB)": 33.01, + "step": 4200, + "train_speed(iter/s)": 0.188228 + }, + { + "acc": 0.93322506, + "epoch": 1.3933068257123924, + "grad_norm": 1.9729320822722771, + "learning_rate": 9.989431848675594e-06, + "loss": 0.31466131, + "memory(GiB)": 33.01, + "step": 4205, + "train_speed(iter/s)": 0.188234 + }, + { + "acc": 0.9312398, + "epoch": 1.394963552021206, + "grad_norm": 2.4255073560346796, + "learning_rate": 9.989342660425596e-06, + "loss": 0.31000106, + "memory(GiB)": 33.01, + "step": 4210, + "train_speed(iter/s)": 0.18824 + }, + { + "acc": 0.93309641, + "epoch": 1.39662027833002, + "grad_norm": 3.1566325185723803, + "learning_rate": 9.989253097811762e-06, + "loss": 0.2976181, + "memory(GiB)": 33.01, + "step": 4215, + "train_speed(iter/s)": 0.188245 + }, + { + "acc": 0.93946476, + "epoch": 1.3982770046388335, + "grad_norm": 2.476569856253078, + "learning_rate": 9.989163160840818e-06, + "loss": 0.28907919, + "memory(GiB)": 33.01, + "step": 4220, + "train_speed(iter/s)": 0.188248 + }, + { + "acc": 0.94837618, + "epoch": 1.3999337309476474, + "grad_norm": 2.911679720820383, + "learning_rate": 9.989072849519508e-06, + "loss": 0.27384791, + "memory(GiB)": 33.01, + "step": 4225, + "train_speed(iter/s)": 0.188248 + }, + { + "acc": 0.93348541, + "epoch": 1.4015904572564613, + "grad_norm": 3.441434835167615, + "learning_rate": 9.988982163854613e-06, + "loss": 0.28337784, + "memory(GiB)": 33.01, + "step": 4230, + "train_speed(iter/s)": 0.188243 + }, + { + "acc": 0.93422871, + "epoch": 1.403247183565275, + "grad_norm": 3.1333176002891996, + "learning_rate": 9.988891103852935e-06, + "loss": 0.28754125, + "memory(GiB)": 33.01, + "step": 4235, + "train_speed(iter/s)": 0.188248 + }, + { + "acc": 0.94224987, + "epoch": 1.4049039098740888, + "grad_norm": 3.86832950886018, + "learning_rate": 9.988799669521308e-06, + "loss": 0.28542409, + "memory(GiB)": 33.01, + "step": 4240, + "train_speed(iter/s)": 0.188254 + }, + { + "acc": 0.94306545, + "epoch": 1.4065606361829026, + "grad_norm": 2.243059423935416, + "learning_rate": 9.988707860866595e-06, + "loss": 0.26384854, + "memory(GiB)": 33.01, + "step": 4245, + "train_speed(iter/s)": 0.18826 + }, + { + "acc": 0.94372959, + "epoch": 1.4082173624917163, + "grad_norm": 2.4386320268099504, + "learning_rate": 9.988615677895683e-06, + "loss": 0.27046497, + "memory(GiB)": 33.01, + "step": 4250, + "train_speed(iter/s)": 0.188265 + }, + { + "acc": 0.94478703, + "epoch": 1.4098740888005301, + "grad_norm": 2.866006036158438, + "learning_rate": 9.98852312061549e-06, + "loss": 0.26427975, + "memory(GiB)": 33.01, + "step": 4255, + "train_speed(iter/s)": 0.18827 + }, + { + "acc": 0.94633064, + "epoch": 1.411530815109344, + "grad_norm": 2.996758179153441, + "learning_rate": 9.988430189032962e-06, + "loss": 0.26404467, + "memory(GiB)": 33.01, + "step": 4260, + "train_speed(iter/s)": 0.188276 + }, + { + "acc": 0.94439745, + "epoch": 1.4131875414181576, + "grad_norm": 3.431455227332249, + "learning_rate": 9.988336883155075e-06, + "loss": 0.27302637, + "memory(GiB)": 33.01, + "step": 4265, + "train_speed(iter/s)": 0.188281 + }, + { + "acc": 0.93996572, + "epoch": 1.4148442677269715, + "grad_norm": 3.5621623685509443, + "learning_rate": 9.988243202988825e-06, + "loss": 0.28133731, + "memory(GiB)": 33.01, + "step": 4270, + "train_speed(iter/s)": 0.188287 + }, + { + "acc": 0.93899097, + "epoch": 1.4165009940357853, + "grad_norm": 2.576958716798402, + "learning_rate": 9.988149148541247e-06, + "loss": 0.30188389, + "memory(GiB)": 33.01, + "step": 4275, + "train_speed(iter/s)": 0.188293 + }, + { + "acc": 0.93714771, + "epoch": 1.418157720344599, + "grad_norm": 3.6469486042808006, + "learning_rate": 9.988054719819396e-06, + "loss": 0.28270049, + "memory(GiB)": 33.01, + "step": 4280, + "train_speed(iter/s)": 0.188299 + }, + { + "acc": 0.93806877, + "epoch": 1.4198144466534128, + "grad_norm": 4.028564160520423, + "learning_rate": 9.987959916830359e-06, + "loss": 0.34861684, + "memory(GiB)": 33.01, + "step": 4285, + "train_speed(iter/s)": 0.188304 + }, + { + "acc": 0.93435154, + "epoch": 1.4214711729622267, + "grad_norm": 2.7449103891634503, + "learning_rate": 9.98786473958125e-06, + "loss": 0.2720546, + "memory(GiB)": 33.01, + "step": 4290, + "train_speed(iter/s)": 0.188309 + }, + { + "acc": 0.94607534, + "epoch": 1.4231278992710403, + "grad_norm": 3.7844318142821773, + "learning_rate": 9.987769188079212e-06, + "loss": 0.28037467, + "memory(GiB)": 33.01, + "step": 4295, + "train_speed(iter/s)": 0.188314 + }, + { + "acc": 0.93993568, + "epoch": 1.4247846255798542, + "grad_norm": 5.149616419195095, + "learning_rate": 9.987673262331412e-06, + "loss": 0.28110685, + "memory(GiB)": 33.01, + "step": 4300, + "train_speed(iter/s)": 0.18832 + }, + { + "acc": 0.93993053, + "epoch": 1.426441351888668, + "grad_norm": 4.1529337866853915, + "learning_rate": 9.987576962345053e-06, + "loss": 0.28562617, + "memory(GiB)": 33.01, + "step": 4305, + "train_speed(iter/s)": 0.188325 + }, + { + "acc": 0.95176363, + "epoch": 1.4280980781974817, + "grad_norm": 3.5657578833957273, + "learning_rate": 9.987480288127357e-06, + "loss": 0.23848631, + "memory(GiB)": 33.01, + "step": 4310, + "train_speed(iter/s)": 0.188331 + }, + { + "acc": 0.94450035, + "epoch": 1.4297548045062956, + "grad_norm": 3.189509654070219, + "learning_rate": 9.987383239685581e-06, + "loss": 0.26595705, + "memory(GiB)": 33.01, + "step": 4315, + "train_speed(iter/s)": 0.188337 + }, + { + "acc": 0.94286051, + "epoch": 1.4314115308151094, + "grad_norm": 2.614431557513013, + "learning_rate": 9.987285817027007e-06, + "loss": 0.2524662, + "memory(GiB)": 33.01, + "step": 4320, + "train_speed(iter/s)": 0.188342 + }, + { + "acc": 0.94530735, + "epoch": 1.433068257123923, + "grad_norm": 4.132661227795143, + "learning_rate": 9.987188020158944e-06, + "loss": 0.24585824, + "memory(GiB)": 33.01, + "step": 4325, + "train_speed(iter/s)": 0.188347 + }, + { + "acc": 0.93835888, + "epoch": 1.434724983432737, + "grad_norm": 4.110450519736193, + "learning_rate": 9.987089849088735e-06, + "loss": 0.28327467, + "memory(GiB)": 33.01, + "step": 4330, + "train_speed(iter/s)": 0.188353 + }, + { + "acc": 0.94487133, + "epoch": 1.4363817097415508, + "grad_norm": 3.046038884222029, + "learning_rate": 9.986991303823742e-06, + "loss": 0.25465138, + "memory(GiB)": 33.01, + "step": 4335, + "train_speed(iter/s)": 0.188358 + }, + { + "acc": 0.95449142, + "epoch": 1.4380384360503644, + "grad_norm": 3.1566223779421487, + "learning_rate": 9.986892384371363e-06, + "loss": 0.22105565, + "memory(GiB)": 33.01, + "step": 4340, + "train_speed(iter/s)": 0.188363 + }, + { + "acc": 0.94515934, + "epoch": 1.4396951623591783, + "grad_norm": 3.1217568742644777, + "learning_rate": 9.986793090739018e-06, + "loss": 0.25169601, + "memory(GiB)": 33.01, + "step": 4345, + "train_speed(iter/s)": 0.188369 + }, + { + "acc": 0.94277983, + "epoch": 1.4413518886679921, + "grad_norm": 3.596221110177539, + "learning_rate": 9.98669342293416e-06, + "loss": 0.2747798, + "memory(GiB)": 33.01, + "step": 4350, + "train_speed(iter/s)": 0.188374 + }, + { + "acc": 0.9469965, + "epoch": 1.4430086149768058, + "grad_norm": 2.9384089218959044, + "learning_rate": 9.986593380964269e-06, + "loss": 0.25603518, + "memory(GiB)": 33.01, + "step": 4355, + "train_speed(iter/s)": 0.188379 + }, + { + "acc": 0.94336195, + "epoch": 1.4446653412856196, + "grad_norm": 3.4666269364313886, + "learning_rate": 9.986492964836851e-06, + "loss": 0.2590342, + "memory(GiB)": 33.01, + "step": 4360, + "train_speed(iter/s)": 0.188384 + }, + { + "acc": 0.94749794, + "epoch": 1.4463220675944335, + "grad_norm": 2.4064228589733814, + "learning_rate": 9.986392174559441e-06, + "loss": 0.24870133, + "memory(GiB)": 33.01, + "step": 4365, + "train_speed(iter/s)": 0.188389 + }, + { + "acc": 0.93699341, + "epoch": 1.4479787939032471, + "grad_norm": 2.677875073448861, + "learning_rate": 9.986291010139601e-06, + "loss": 0.29232826, + "memory(GiB)": 33.01, + "step": 4370, + "train_speed(iter/s)": 0.188395 + }, + { + "acc": 0.93877039, + "epoch": 1.449635520212061, + "grad_norm": 2.8612888959465925, + "learning_rate": 9.986189471584927e-06, + "loss": 0.27531915, + "memory(GiB)": 33.01, + "step": 4375, + "train_speed(iter/s)": 0.1884 + }, + { + "acc": 0.9468504, + "epoch": 1.4512922465208749, + "grad_norm": 2.9523552229828667, + "learning_rate": 9.986087558903034e-06, + "loss": 0.24648032, + "memory(GiB)": 33.01, + "step": 4380, + "train_speed(iter/s)": 0.188405 + }, + { + "acc": 0.93813934, + "epoch": 1.4529489728296885, + "grad_norm": 3.0496526021997323, + "learning_rate": 9.985985272101572e-06, + "loss": 0.28014739, + "memory(GiB)": 33.01, + "step": 4385, + "train_speed(iter/s)": 0.18841 + }, + { + "acc": 0.94850588, + "epoch": 1.4546056991385024, + "grad_norm": 2.9243739816215273, + "learning_rate": 9.985882611188215e-06, + "loss": 0.2484787, + "memory(GiB)": 33.01, + "step": 4390, + "train_speed(iter/s)": 0.188416 + }, + { + "acc": 0.93145809, + "epoch": 1.4562624254473162, + "grad_norm": 2.603042420356731, + "learning_rate": 9.985779576170667e-06, + "loss": 0.30901597, + "memory(GiB)": 33.01, + "step": 4395, + "train_speed(iter/s)": 0.18842 + }, + { + "acc": 0.94351511, + "epoch": 1.4579191517561298, + "grad_norm": 2.289743784223293, + "learning_rate": 9.98567616705666e-06, + "loss": 0.26875901, + "memory(GiB)": 33.01, + "step": 4400, + "train_speed(iter/s)": 0.188425 + }, + { + "acc": 0.94040852, + "epoch": 1.4595758780649437, + "grad_norm": 4.596671413802325, + "learning_rate": 9.985572383853952e-06, + "loss": 0.2778924, + "memory(GiB)": 33.01, + "step": 4405, + "train_speed(iter/s)": 0.18843 + }, + { + "acc": 0.93312197, + "epoch": 1.4612326043737576, + "grad_norm": 3.280940498392058, + "learning_rate": 9.985468226570338e-06, + "loss": 0.30632141, + "memory(GiB)": 33.01, + "step": 4410, + "train_speed(iter/s)": 0.188435 + }, + { + "acc": 0.94229469, + "epoch": 1.4628893306825712, + "grad_norm": 2.5384946557496315, + "learning_rate": 9.985363695213625e-06, + "loss": 0.28237987, + "memory(GiB)": 33.01, + "step": 4415, + "train_speed(iter/s)": 0.18844 + }, + { + "acc": 0.9361887, + "epoch": 1.464546056991385, + "grad_norm": 2.810763212581721, + "learning_rate": 9.985258789791661e-06, + "loss": 0.29882345, + "memory(GiB)": 33.01, + "step": 4420, + "train_speed(iter/s)": 0.188445 + }, + { + "acc": 0.94827404, + "epoch": 1.466202783300199, + "grad_norm": 2.606906569992161, + "learning_rate": 9.98515351031232e-06, + "loss": 0.25767074, + "memory(GiB)": 33.01, + "step": 4425, + "train_speed(iter/s)": 0.18845 + }, + { + "acc": 0.94562292, + "epoch": 1.4678595096090126, + "grad_norm": 4.317781725106929, + "learning_rate": 9.9850478567835e-06, + "loss": 0.25841742, + "memory(GiB)": 33.01, + "step": 4430, + "train_speed(iter/s)": 0.188456 + }, + { + "acc": 0.94369373, + "epoch": 1.4695162359178264, + "grad_norm": 3.1070989538697598, + "learning_rate": 9.984941829213128e-06, + "loss": 0.26027231, + "memory(GiB)": 33.01, + "step": 4435, + "train_speed(iter/s)": 0.18846 + }, + { + "acc": 0.93526039, + "epoch": 1.4711729622266403, + "grad_norm": 3.0051770344683617, + "learning_rate": 9.984835427609161e-06, + "loss": 0.32055621, + "memory(GiB)": 33.01, + "step": 4440, + "train_speed(iter/s)": 0.188466 + }, + { + "acc": 0.94421978, + "epoch": 1.472829688535454, + "grad_norm": 3.3700092652183886, + "learning_rate": 9.984728651979587e-06, + "loss": 0.26462595, + "memory(GiB)": 33.01, + "step": 4445, + "train_speed(iter/s)": 0.188471 + }, + { + "acc": 0.94748573, + "epoch": 1.4744864148442678, + "grad_norm": 2.433854802806776, + "learning_rate": 9.984621502332415e-06, + "loss": 0.22584906, + "memory(GiB)": 33.01, + "step": 4450, + "train_speed(iter/s)": 0.188468 + }, + { + "acc": 0.93821697, + "epoch": 1.4761431411530814, + "grad_norm": 3.3424554831160886, + "learning_rate": 9.984513978675685e-06, + "loss": 0.2816637, + "memory(GiB)": 33.01, + "step": 4455, + "train_speed(iter/s)": 0.188473 + }, + { + "acc": 0.94929428, + "epoch": 1.4777998674618953, + "grad_norm": 2.2572600206045546, + "learning_rate": 9.984406081017468e-06, + "loss": 0.23356147, + "memory(GiB)": 33.01, + "step": 4460, + "train_speed(iter/s)": 0.188469 + }, + { + "acc": 0.94652576, + "epoch": 1.4794565937707091, + "grad_norm": 3.575363670522039, + "learning_rate": 9.984297809365858e-06, + "loss": 0.24346108, + "memory(GiB)": 33.01, + "step": 4465, + "train_speed(iter/s)": 0.188474 + }, + { + "acc": 0.9413991, + "epoch": 1.4811133200795228, + "grad_norm": 3.0839640458880244, + "learning_rate": 9.984189163728982e-06, + "loss": 0.27495437, + "memory(GiB)": 33.01, + "step": 4470, + "train_speed(iter/s)": 0.188479 + }, + { + "acc": 0.93963642, + "epoch": 1.4827700463883366, + "grad_norm": 3.111673666441867, + "learning_rate": 9.984080144114992e-06, + "loss": 0.25605507, + "memory(GiB)": 33.01, + "step": 4475, + "train_speed(iter/s)": 0.188484 + }, + { + "acc": 0.93898993, + "epoch": 1.4844267726971505, + "grad_norm": 3.0321815813549273, + "learning_rate": 9.98397075053207e-06, + "loss": 0.30646503, + "memory(GiB)": 33.01, + "step": 4480, + "train_speed(iter/s)": 0.188489 + }, + { + "acc": 0.95032883, + "epoch": 1.4860834990059641, + "grad_norm": 2.156852537631904, + "learning_rate": 9.983860982988422e-06, + "loss": 0.22426569, + "memory(GiB)": 33.01, + "step": 4485, + "train_speed(iter/s)": 0.188494 + }, + { + "acc": 0.94717827, + "epoch": 1.487740225314778, + "grad_norm": 2.5987339576925934, + "learning_rate": 9.983750841492289e-06, + "loss": 0.25280399, + "memory(GiB)": 33.01, + "step": 4490, + "train_speed(iter/s)": 0.188499 + }, + { + "acc": 0.93444853, + "epoch": 1.4893969516235916, + "grad_norm": 3.8845195701504243, + "learning_rate": 9.983640326051933e-06, + "loss": 0.31441751, + "memory(GiB)": 33.01, + "step": 4495, + "train_speed(iter/s)": 0.188504 + }, + { + "acc": 0.94258862, + "epoch": 1.4910536779324055, + "grad_norm": 3.844360759369666, + "learning_rate": 9.983529436675647e-06, + "loss": 0.26622539, + "memory(GiB)": 33.01, + "step": 4500, + "train_speed(iter/s)": 0.188509 + }, + { + "acc": 0.94434681, + "epoch": 1.4927104042412194, + "grad_norm": 2.918815300404156, + "learning_rate": 9.983418173371753e-06, + "loss": 0.31492476, + "memory(GiB)": 33.01, + "step": 4505, + "train_speed(iter/s)": 0.188514 + }, + { + "acc": 0.96208019, + "epoch": 1.494367130550033, + "grad_norm": 2.5202193462964795, + "learning_rate": 9.9833065361486e-06, + "loss": 0.21308544, + "memory(GiB)": 33.01, + "step": 4510, + "train_speed(iter/s)": 0.188519 + }, + { + "acc": 0.94232941, + "epoch": 1.4960238568588469, + "grad_norm": 3.166034806246495, + "learning_rate": 9.983194525014566e-06, + "loss": 0.2875102, + "memory(GiB)": 33.01, + "step": 4515, + "train_speed(iter/s)": 0.188523 + }, + { + "acc": 0.92990093, + "epoch": 1.4976805831676607, + "grad_norm": 5.907539194923817, + "learning_rate": 9.983082139978058e-06, + "loss": 0.32663989, + "memory(GiB)": 33.01, + "step": 4520, + "train_speed(iter/s)": 0.188528 + }, + { + "acc": 0.94061928, + "epoch": 1.4993373094764744, + "grad_norm": 3.8378386950851673, + "learning_rate": 9.982969381047506e-06, + "loss": 0.27804546, + "memory(GiB)": 33.01, + "step": 4525, + "train_speed(iter/s)": 0.188533 + }, + { + "acc": 0.9455164, + "epoch": 1.5009940357852882, + "grad_norm": 2.4790047190754887, + "learning_rate": 9.982856248231372e-06, + "loss": 0.26023202, + "memory(GiB)": 33.01, + "step": 4530, + "train_speed(iter/s)": 0.188538 + }, + { + "acc": 0.94345798, + "epoch": 1.502650762094102, + "grad_norm": 3.0573407951814806, + "learning_rate": 9.982742741538146e-06, + "loss": 0.2570219, + "memory(GiB)": 33.01, + "step": 4535, + "train_speed(iter/s)": 0.188543 + }, + { + "acc": 0.94834957, + "epoch": 1.5043074884029157, + "grad_norm": 4.104025784557431, + "learning_rate": 9.982628860976346e-06, + "loss": 0.25655692, + "memory(GiB)": 33.01, + "step": 4540, + "train_speed(iter/s)": 0.188548 + }, + { + "acc": 0.95639915, + "epoch": 1.5059642147117296, + "grad_norm": 3.93063717312741, + "learning_rate": 9.982514606554518e-06, + "loss": 0.22867155, + "memory(GiB)": 33.01, + "step": 4545, + "train_speed(iter/s)": 0.188553 + }, + { + "acc": 0.95113354, + "epoch": 1.5076209410205434, + "grad_norm": 20.94903292322898, + "learning_rate": 9.982399978281236e-06, + "loss": 0.27273102, + "memory(GiB)": 33.01, + "step": 4550, + "train_speed(iter/s)": 0.188558 + }, + { + "acc": 0.95819855, + "epoch": 1.509277667329357, + "grad_norm": 3.4387520189083847, + "learning_rate": 9.9822849761651e-06, + "loss": 0.23380876, + "memory(GiB)": 33.01, + "step": 4555, + "train_speed(iter/s)": 0.188563 + }, + { + "acc": 0.95431976, + "epoch": 1.510934393638171, + "grad_norm": 3.562322457355702, + "learning_rate": 9.98216960021474e-06, + "loss": 0.20716219, + "memory(GiB)": 33.01, + "step": 4560, + "train_speed(iter/s)": 0.188568 + }, + { + "acc": 0.95186443, + "epoch": 1.5125911199469848, + "grad_norm": 3.680971429469887, + "learning_rate": 9.982053850438816e-06, + "loss": 0.22953637, + "memory(GiB)": 33.01, + "step": 4565, + "train_speed(iter/s)": 0.188573 + }, + { + "acc": 0.95531559, + "epoch": 1.5142478462557984, + "grad_norm": 4.100797831635736, + "learning_rate": 9.981937726846012e-06, + "loss": 0.22091107, + "memory(GiB)": 33.01, + "step": 4570, + "train_speed(iter/s)": 0.188578 + }, + { + "acc": 0.94186716, + "epoch": 1.5159045725646123, + "grad_norm": 3.3223197471269157, + "learning_rate": 9.981821229445041e-06, + "loss": 0.26601729, + "memory(GiB)": 33.01, + "step": 4575, + "train_speed(iter/s)": 0.188583 + }, + { + "acc": 0.93858147, + "epoch": 1.5175612988734262, + "grad_norm": 3.3266660382792055, + "learning_rate": 9.981704358244647e-06, + "loss": 0.28532922, + "memory(GiB)": 33.01, + "step": 4580, + "train_speed(iter/s)": 0.188588 + }, + { + "acc": 0.94062061, + "epoch": 1.5192180251822398, + "grad_norm": 2.8894966939472817, + "learning_rate": 9.9815871132536e-06, + "loss": 0.29712996, + "memory(GiB)": 33.01, + "step": 4585, + "train_speed(iter/s)": 0.188593 + }, + { + "acc": 0.94864483, + "epoch": 1.5208747514910537, + "grad_norm": 2.538374198617406, + "learning_rate": 9.981469494480699e-06, + "loss": 0.23360934, + "memory(GiB)": 33.01, + "step": 4590, + "train_speed(iter/s)": 0.188597 + }, + { + "acc": 0.93387566, + "epoch": 1.5225314777998675, + "grad_norm": 3.4944236423447506, + "learning_rate": 9.981351501934764e-06, + "loss": 0.31100078, + "memory(GiB)": 33.01, + "step": 4595, + "train_speed(iter/s)": 0.188602 + }, + { + "acc": 0.95249996, + "epoch": 1.5241882041086812, + "grad_norm": 3.2114280847773164, + "learning_rate": 9.981233135624658e-06, + "loss": 0.2311841, + "memory(GiB)": 33.01, + "step": 4600, + "train_speed(iter/s)": 0.188607 + }, + { + "acc": 0.94958639, + "epoch": 1.525844930417495, + "grad_norm": 2.3210712199238377, + "learning_rate": 9.981114395559257e-06, + "loss": 0.24990735, + "memory(GiB)": 33.01, + "step": 4605, + "train_speed(iter/s)": 0.188611 + }, + { + "acc": 0.95304022, + "epoch": 1.5275016567263089, + "grad_norm": 2.128002424742902, + "learning_rate": 9.980995281747473e-06, + "loss": 0.223489, + "memory(GiB)": 33.01, + "step": 4610, + "train_speed(iter/s)": 0.188617 + }, + { + "acc": 0.94253674, + "epoch": 1.5291583830351225, + "grad_norm": 2.872106254199602, + "learning_rate": 9.980875794198245e-06, + "loss": 0.28155315, + "memory(GiB)": 33.01, + "step": 4615, + "train_speed(iter/s)": 0.188621 + }, + { + "acc": 0.94812603, + "epoch": 1.5308151093439364, + "grad_norm": 3.301666913276626, + "learning_rate": 9.98075593292054e-06, + "loss": 0.24484925, + "memory(GiB)": 33.01, + "step": 4620, + "train_speed(iter/s)": 0.188626 + }, + { + "acc": 0.94433002, + "epoch": 1.5324718356527502, + "grad_norm": 3.023955619792361, + "learning_rate": 9.98063569792335e-06, + "loss": 0.27217543, + "memory(GiB)": 33.01, + "step": 4625, + "train_speed(iter/s)": 0.188631 + }, + { + "acc": 0.94974155, + "epoch": 1.5341285619615639, + "grad_norm": 3.3458366768646193, + "learning_rate": 9.9805150892157e-06, + "loss": 0.25424268, + "memory(GiB)": 33.01, + "step": 4630, + "train_speed(iter/s)": 0.188635 + }, + { + "acc": 0.9459053, + "epoch": 1.5357852882703777, + "grad_norm": 2.2689321431155234, + "learning_rate": 9.980394106806639e-06, + "loss": 0.25565271, + "memory(GiB)": 33.01, + "step": 4635, + "train_speed(iter/s)": 0.188639 + }, + { + "acc": 0.93355293, + "epoch": 1.5374420145791916, + "grad_norm": 2.7753707714720517, + "learning_rate": 9.980272750705245e-06, + "loss": 0.28458288, + "memory(GiB)": 33.01, + "step": 4640, + "train_speed(iter/s)": 0.188644 + }, + { + "acc": 0.9476614, + "epoch": 1.5390987408880052, + "grad_norm": 2.3229287601639133, + "learning_rate": 9.980151020920627e-06, + "loss": 0.25084689, + "memory(GiB)": 33.01, + "step": 4645, + "train_speed(iter/s)": 0.188649 + }, + { + "acc": 0.95184021, + "epoch": 1.540755467196819, + "grad_norm": 6.83150151463161, + "learning_rate": 9.980028917461918e-06, + "loss": 0.24106314, + "memory(GiB)": 33.01, + "step": 4650, + "train_speed(iter/s)": 0.188653 + }, + { + "acc": 0.9499938, + "epoch": 1.542412193505633, + "grad_norm": 4.508910679339447, + "learning_rate": 9.979906440338279e-06, + "loss": 0.2259788, + "memory(GiB)": 33.01, + "step": 4655, + "train_speed(iter/s)": 0.188658 + }, + { + "acc": 0.94400835, + "epoch": 1.5440689198144466, + "grad_norm": 4.486912101753494, + "learning_rate": 9.979783589558905e-06, + "loss": 0.2795326, + "memory(GiB)": 33.01, + "step": 4660, + "train_speed(iter/s)": 0.188662 + }, + { + "acc": 0.93964567, + "epoch": 1.5457256461232605, + "grad_norm": 4.1657848852788355, + "learning_rate": 9.979660365133011e-06, + "loss": 0.26774707, + "memory(GiB)": 33.01, + "step": 4665, + "train_speed(iter/s)": 0.188666 + }, + { + "acc": 0.94199753, + "epoch": 1.5473823724320743, + "grad_norm": 2.7648572377320053, + "learning_rate": 9.979536767069845e-06, + "loss": 0.25348239, + "memory(GiB)": 33.01, + "step": 4670, + "train_speed(iter/s)": 0.188671 + }, + { + "acc": 0.95243158, + "epoch": 1.549039098740888, + "grad_norm": 2.8747702505039254, + "learning_rate": 9.979412795378682e-06, + "loss": 0.220208, + "memory(GiB)": 33.01, + "step": 4675, + "train_speed(iter/s)": 0.188671 + }, + { + "acc": 0.94590988, + "epoch": 1.5506958250497018, + "grad_norm": 5.076808549466851, + "learning_rate": 9.979288450068824e-06, + "loss": 0.23557816, + "memory(GiB)": 33.01, + "step": 4680, + "train_speed(iter/s)": 0.188672 + }, + { + "acc": 0.93664322, + "epoch": 1.5523525513585157, + "grad_norm": 4.425463261157371, + "learning_rate": 9.979163731149604e-06, + "loss": 0.31336203, + "memory(GiB)": 33.01, + "step": 4685, + "train_speed(iter/s)": 0.188668 + }, + { + "acc": 0.9431179, + "epoch": 1.5540092776673293, + "grad_norm": 3.126294777396309, + "learning_rate": 9.979038638630377e-06, + "loss": 0.294207, + "memory(GiB)": 33.01, + "step": 4690, + "train_speed(iter/s)": 0.188673 + }, + { + "acc": 0.95931683, + "epoch": 1.5556660039761432, + "grad_norm": 2.141774558741648, + "learning_rate": 9.978913172520536e-06, + "loss": 0.21437566, + "memory(GiB)": 33.01, + "step": 4695, + "train_speed(iter/s)": 0.188678 + }, + { + "acc": 0.94850693, + "epoch": 1.557322730284957, + "grad_norm": 2.2059357337336234, + "learning_rate": 9.97878733282949e-06, + "loss": 0.28407876, + "memory(GiB)": 33.01, + "step": 4700, + "train_speed(iter/s)": 0.188683 + }, + { + "acc": 0.95758886, + "epoch": 1.5589794565937707, + "grad_norm": 2.4757581341795243, + "learning_rate": 9.978661119566686e-06, + "loss": 0.19829302, + "memory(GiB)": 33.01, + "step": 4705, + "train_speed(iter/s)": 0.188687 + }, + { + "acc": 0.95511742, + "epoch": 1.5606361829025845, + "grad_norm": 3.0990654451934745, + "learning_rate": 9.978534532741593e-06, + "loss": 0.23894968, + "memory(GiB)": 33.01, + "step": 4710, + "train_speed(iter/s)": 0.188692 + }, + { + "acc": 0.9526618, + "epoch": 1.5622929092113984, + "grad_norm": 3.657655528211022, + "learning_rate": 9.97840757236371e-06, + "loss": 0.24336443, + "memory(GiB)": 33.01, + "step": 4715, + "train_speed(iter/s)": 0.188697 + }, + { + "acc": 0.94776039, + "epoch": 1.563949635520212, + "grad_norm": 5.621035773137675, + "learning_rate": 9.978280238442567e-06, + "loss": 0.23600438, + "memory(GiB)": 33.01, + "step": 4720, + "train_speed(iter/s)": 0.188701 + }, + { + "acc": 0.94383068, + "epoch": 1.5656063618290257, + "grad_norm": 4.32774043670886, + "learning_rate": 9.978152530987715e-06, + "loss": 0.24449029, + "memory(GiB)": 33.01, + "step": 4725, + "train_speed(iter/s)": 0.188706 + }, + { + "acc": 0.94421568, + "epoch": 1.5672630881378398, + "grad_norm": 3.33296997831563, + "learning_rate": 9.97802445000874e-06, + "loss": 0.25713592, + "memory(GiB)": 33.01, + "step": 4730, + "train_speed(iter/s)": 0.18871 + }, + { + "acc": 0.95428514, + "epoch": 1.5689198144466534, + "grad_norm": 1.6532099651446057, + "learning_rate": 9.977895995515252e-06, + "loss": 0.21798651, + "memory(GiB)": 33.01, + "step": 4735, + "train_speed(iter/s)": 0.188714 + }, + { + "acc": 0.9493392, + "epoch": 1.570576540755467, + "grad_norm": 3.740700380404861, + "learning_rate": 9.977767167516892e-06, + "loss": 0.23871698, + "memory(GiB)": 33.01, + "step": 4740, + "train_speed(iter/s)": 0.188719 + }, + { + "acc": 0.94671459, + "epoch": 1.5722332670642811, + "grad_norm": 3.368393810253313, + "learning_rate": 9.977637966023325e-06, + "loss": 0.23216662, + "memory(GiB)": 33.01, + "step": 4745, + "train_speed(iter/s)": 0.188723 + }, + { + "acc": 0.95063591, + "epoch": 1.5738899933730948, + "grad_norm": 3.0993943871932976, + "learning_rate": 9.977508391044248e-06, + "loss": 0.26406002, + "memory(GiB)": 33.01, + "step": 4750, + "train_speed(iter/s)": 0.188728 + }, + { + "acc": 0.94728756, + "epoch": 1.5755467196819084, + "grad_norm": 3.3800826559542623, + "learning_rate": 9.977378442589384e-06, + "loss": 0.2560663, + "memory(GiB)": 33.01, + "step": 4755, + "train_speed(iter/s)": 0.188732 + }, + { + "acc": 0.94631729, + "epoch": 1.5772034459907225, + "grad_norm": 3.327726538205645, + "learning_rate": 9.977248120668483e-06, + "loss": 0.24505229, + "memory(GiB)": 33.01, + "step": 4760, + "train_speed(iter/s)": 0.188737 + }, + { + "acc": 0.9512085, + "epoch": 1.5788601722995361, + "grad_norm": 4.054660356081657, + "learning_rate": 9.977117425291326e-06, + "loss": 0.25066209, + "memory(GiB)": 33.01, + "step": 4765, + "train_speed(iter/s)": 0.188742 + }, + { + "acc": 0.94238873, + "epoch": 1.5805168986083498, + "grad_norm": 2.68219292620574, + "learning_rate": 9.97698635646772e-06, + "loss": 0.24627228, + "memory(GiB)": 33.01, + "step": 4770, + "train_speed(iter/s)": 0.188746 + }, + { + "acc": 0.9424408, + "epoch": 1.5821736249171638, + "grad_norm": 3.6554527332658666, + "learning_rate": 9.976854914207498e-06, + "loss": 0.27461429, + "memory(GiB)": 33.01, + "step": 4775, + "train_speed(iter/s)": 0.18875 + }, + { + "acc": 0.9430851, + "epoch": 1.5838303512259775, + "grad_norm": 3.8964501710851858, + "learning_rate": 9.976723098520528e-06, + "loss": 0.26223986, + "memory(GiB)": 33.01, + "step": 4780, + "train_speed(iter/s)": 0.188755 + }, + { + "acc": 0.94343033, + "epoch": 1.5854870775347911, + "grad_norm": 3.369986079845925, + "learning_rate": 9.976590909416698e-06, + "loss": 0.26183143, + "memory(GiB)": 33.01, + "step": 4785, + "train_speed(iter/s)": 0.18876 + }, + { + "acc": 0.96031656, + "epoch": 1.587143803843605, + "grad_norm": 2.380405076752628, + "learning_rate": 9.97645834690593e-06, + "loss": 0.19646028, + "memory(GiB)": 33.01, + "step": 4790, + "train_speed(iter/s)": 0.188764 + }, + { + "acc": 0.95498562, + "epoch": 1.5888005301524188, + "grad_norm": 4.297364822205282, + "learning_rate": 9.976325410998168e-06, + "loss": 0.22078636, + "memory(GiB)": 33.01, + "step": 4795, + "train_speed(iter/s)": 0.188768 + }, + { + "acc": 0.94994173, + "epoch": 1.5904572564612325, + "grad_norm": 2.547088024508513, + "learning_rate": 9.976192101703393e-06, + "loss": 0.23789809, + "memory(GiB)": 33.01, + "step": 4800, + "train_speed(iter/s)": 0.188773 + }, + { + "acc": 0.95071087, + "epoch": 1.5921139827700463, + "grad_norm": 2.895434807469631, + "learning_rate": 9.976058419031603e-06, + "loss": 0.24055889, + "memory(GiB)": 33.01, + "step": 4805, + "train_speed(iter/s)": 0.188778 + }, + { + "acc": 0.94927464, + "epoch": 1.5937707090788602, + "grad_norm": 2.7022893151934295, + "learning_rate": 9.975924362992832e-06, + "loss": 0.240098, + "memory(GiB)": 33.01, + "step": 4810, + "train_speed(iter/s)": 0.188782 + }, + { + "acc": 0.94955883, + "epoch": 1.5954274353876738, + "grad_norm": 3.098667505578039, + "learning_rate": 9.975789933597138e-06, + "loss": 0.29886706, + "memory(GiB)": 33.01, + "step": 4815, + "train_speed(iter/s)": 0.188786 + }, + { + "acc": 0.9490921, + "epoch": 1.5970841616964877, + "grad_norm": 3.1763937338831485, + "learning_rate": 9.975655130854612e-06, + "loss": 0.25676432, + "memory(GiB)": 33.01, + "step": 4820, + "train_speed(iter/s)": 0.188791 + }, + { + "acc": 0.94501534, + "epoch": 1.5987408880053016, + "grad_norm": 3.3037021375603874, + "learning_rate": 9.975519954775369e-06, + "loss": 0.26909356, + "memory(GiB)": 33.01, + "step": 4825, + "train_speed(iter/s)": 0.188795 + }, + { + "acc": 0.93495655, + "epoch": 1.6003976143141152, + "grad_norm": 5.088113469771529, + "learning_rate": 9.975384405369549e-06, + "loss": 0.27787559, + "memory(GiB)": 33.01, + "step": 4830, + "train_speed(iter/s)": 0.188799 + }, + { + "acc": 0.94772968, + "epoch": 1.602054340622929, + "grad_norm": 4.514764624259456, + "learning_rate": 9.975248482647327e-06, + "loss": 0.23549538, + "memory(GiB)": 33.01, + "step": 4835, + "train_speed(iter/s)": 0.188804 + }, + { + "acc": 0.9459712, + "epoch": 1.603711066931743, + "grad_norm": 2.877098089780718, + "learning_rate": 9.975112186618902e-06, + "loss": 0.27602563, + "memory(GiB)": 33.01, + "step": 4840, + "train_speed(iter/s)": 0.188808 + }, + { + "acc": 0.96013279, + "epoch": 1.6053677932405566, + "grad_norm": 2.974592013638567, + "learning_rate": 9.9749755172945e-06, + "loss": 0.2202702, + "memory(GiB)": 33.01, + "step": 4845, + "train_speed(iter/s)": 0.188812 + }, + { + "acc": 0.96109982, + "epoch": 1.6070245195493704, + "grad_norm": 3.0350816349287246, + "learning_rate": 9.974838474684379e-06, + "loss": 0.1902697, + "memory(GiB)": 33.01, + "step": 4850, + "train_speed(iter/s)": 0.188816 + }, + { + "acc": 0.95083942, + "epoch": 1.6086812458581843, + "grad_norm": 6.679294419395035, + "learning_rate": 9.974701058798822e-06, + "loss": 0.23198597, + "memory(GiB)": 33.01, + "step": 4855, + "train_speed(iter/s)": 0.18882 + }, + { + "acc": 0.94768581, + "epoch": 1.610337972166998, + "grad_norm": 3.9727156126425944, + "learning_rate": 9.97456326964814e-06, + "loss": 0.23015182, + "memory(GiB)": 33.01, + "step": 4860, + "train_speed(iter/s)": 0.188824 + }, + { + "acc": 0.95107021, + "epoch": 1.6119946984758118, + "grad_norm": 4.535114055899051, + "learning_rate": 9.974425107242673e-06, + "loss": 0.23106141, + "memory(GiB)": 33.01, + "step": 4865, + "train_speed(iter/s)": 0.188829 + }, + { + "acc": 0.9494894, + "epoch": 1.6136514247846256, + "grad_norm": 3.870704983276642, + "learning_rate": 9.974286571592788e-06, + "loss": 0.26235733, + "memory(GiB)": 33.01, + "step": 4870, + "train_speed(iter/s)": 0.188833 + }, + { + "acc": 0.94805145, + "epoch": 1.6153081510934393, + "grad_norm": 3.3742441210245797, + "learning_rate": 9.974147662708883e-06, + "loss": 0.21995747, + "memory(GiB)": 33.01, + "step": 4875, + "train_speed(iter/s)": 0.188837 + }, + { + "acc": 0.94874687, + "epoch": 1.6169648774022531, + "grad_norm": 3.401021821485584, + "learning_rate": 9.974008380601382e-06, + "loss": 0.24643075, + "memory(GiB)": 33.01, + "step": 4880, + "train_speed(iter/s)": 0.188842 + }, + { + "acc": 0.95776138, + "epoch": 1.618621603711067, + "grad_norm": 2.865593808145345, + "learning_rate": 9.973868725280732e-06, + "loss": 0.1929089, + "memory(GiB)": 33.01, + "step": 4885, + "train_speed(iter/s)": 0.188846 + }, + { + "acc": 0.94048004, + "epoch": 1.6202783300198806, + "grad_norm": 3.623357009895858, + "learning_rate": 9.973728696757416e-06, + "loss": 0.26675167, + "memory(GiB)": 33.01, + "step": 4890, + "train_speed(iter/s)": 0.188851 + }, + { + "acc": 0.95277262, + "epoch": 1.6219350563286945, + "grad_norm": 4.57397452194311, + "learning_rate": 9.973588295041944e-06, + "loss": 0.22158756, + "memory(GiB)": 33.01, + "step": 4895, + "train_speed(iter/s)": 0.188855 + }, + { + "acc": 0.9544117, + "epoch": 1.6235917826375084, + "grad_norm": 3.77915181580508, + "learning_rate": 9.973447520144849e-06, + "loss": 0.23694935, + "memory(GiB)": 33.01, + "step": 4900, + "train_speed(iter/s)": 0.188859 + }, + { + "acc": 0.95348244, + "epoch": 1.625248508946322, + "grad_norm": 3.0287842565991356, + "learning_rate": 9.973306372076694e-06, + "loss": 0.2032382, + "memory(GiB)": 33.01, + "step": 4905, + "train_speed(iter/s)": 0.188859 + }, + { + "acc": 0.94308414, + "epoch": 1.6269052352551359, + "grad_norm": 2.7091908324528453, + "learning_rate": 9.973164850848075e-06, + "loss": 0.25078931, + "memory(GiB)": 33.01, + "step": 4910, + "train_speed(iter/s)": 0.18886 + }, + { + "acc": 0.94056168, + "epoch": 1.6285619615639497, + "grad_norm": 4.888642428157676, + "learning_rate": 9.973022956469607e-06, + "loss": 0.26888967, + "memory(GiB)": 33.01, + "step": 4915, + "train_speed(iter/s)": 0.188857 + }, + { + "acc": 0.94573936, + "epoch": 1.6302186878727634, + "grad_norm": 3.0506240931691915, + "learning_rate": 9.972880688951938e-06, + "loss": 0.2353272, + "memory(GiB)": 33.01, + "step": 4920, + "train_speed(iter/s)": 0.188862 + }, + { + "acc": 0.95126534, + "epoch": 1.6318754141815772, + "grad_norm": 3.418758115114436, + "learning_rate": 9.972738048305746e-06, + "loss": 0.23389206, + "memory(GiB)": 33.01, + "step": 4925, + "train_speed(iter/s)": 0.188866 + }, + { + "acc": 0.9515564, + "epoch": 1.633532140490391, + "grad_norm": 3.95230001541203, + "learning_rate": 9.972595034541736e-06, + "loss": 0.22550275, + "memory(GiB)": 33.01, + "step": 4930, + "train_speed(iter/s)": 0.18887 + }, + { + "acc": 0.95114269, + "epoch": 1.6351888667992047, + "grad_norm": 2.7403865006896124, + "learning_rate": 9.972451647670637e-06, + "loss": 0.21844039, + "memory(GiB)": 33.01, + "step": 4935, + "train_speed(iter/s)": 0.188875 + }, + { + "acc": 0.94797792, + "epoch": 1.6368455931080186, + "grad_norm": 3.7388123912333575, + "learning_rate": 9.972307887703209e-06, + "loss": 0.24305737, + "memory(GiB)": 33.01, + "step": 4940, + "train_speed(iter/s)": 0.188879 + }, + { + "acc": 0.95092726, + "epoch": 1.6385023194168324, + "grad_norm": 3.118229751230935, + "learning_rate": 9.972163754650241e-06, + "loss": 0.22735724, + "memory(GiB)": 33.01, + "step": 4945, + "train_speed(iter/s)": 0.188884 + }, + { + "acc": 0.95121222, + "epoch": 1.640159045725646, + "grad_norm": 3.5823478676548786, + "learning_rate": 9.972019248522549e-06, + "loss": 0.21556726, + "memory(GiB)": 33.01, + "step": 4950, + "train_speed(iter/s)": 0.188888 + }, + { + "acc": 0.94801369, + "epoch": 1.64181577203446, + "grad_norm": 3.5514396644121535, + "learning_rate": 9.971874369330977e-06, + "loss": 0.24600191, + "memory(GiB)": 33.01, + "step": 4955, + "train_speed(iter/s)": 0.188893 + }, + { + "acc": 0.95595074, + "epoch": 1.6434724983432738, + "grad_norm": 2.804197111880328, + "learning_rate": 9.971729117086393e-06, + "loss": 0.21580234, + "memory(GiB)": 33.01, + "step": 4960, + "train_speed(iter/s)": 0.188898 + }, + { + "acc": 0.95798407, + "epoch": 1.6451292246520874, + "grad_norm": 4.845645575346084, + "learning_rate": 9.971583491799704e-06, + "loss": 0.20477767, + "memory(GiB)": 33.01, + "step": 4965, + "train_speed(iter/s)": 0.188902 + }, + { + "acc": 0.94212322, + "epoch": 1.6467859509609013, + "grad_norm": 4.103328640046485, + "learning_rate": 9.971437493481827e-06, + "loss": 0.23042445, + "memory(GiB)": 33.01, + "step": 4970, + "train_speed(iter/s)": 0.188906 + }, + { + "acc": 0.96105289, + "epoch": 1.6484426772697152, + "grad_norm": 2.7529876836341582, + "learning_rate": 9.97129112214373e-06, + "loss": 0.20002341, + "memory(GiB)": 33.01, + "step": 4975, + "train_speed(iter/s)": 0.18891 + }, + { + "acc": 0.94937401, + "epoch": 1.6500994035785288, + "grad_norm": 3.563674862874002, + "learning_rate": 9.971144377796387e-06, + "loss": 0.21998301, + "memory(GiB)": 33.01, + "step": 4980, + "train_speed(iter/s)": 0.188915 + }, + { + "acc": 0.9542902, + "epoch": 1.6517561298873427, + "grad_norm": 4.074266167183439, + "learning_rate": 9.970997260450817e-06, + "loss": 0.20854292, + "memory(GiB)": 33.01, + "step": 4985, + "train_speed(iter/s)": 0.188919 + }, + { + "acc": 0.96340275, + "epoch": 1.6534128561961565, + "grad_norm": 2.5160689305384083, + "learning_rate": 9.970849770118053e-06, + "loss": 0.18839086, + "memory(GiB)": 33.01, + "step": 4990, + "train_speed(iter/s)": 0.188923 + }, + { + "acc": 0.95943623, + "epoch": 1.6550695825049702, + "grad_norm": 3.4100784753401157, + "learning_rate": 9.970701906809169e-06, + "loss": 0.20573511, + "memory(GiB)": 33.01, + "step": 4995, + "train_speed(iter/s)": 0.188927 + }, + { + "acc": 0.95086899, + "epoch": 1.6567263088137838, + "grad_norm": 3.6252402944198403, + "learning_rate": 9.970553670535254e-06, + "loss": 0.23412619, + "memory(GiB)": 33.01, + "step": 5000, + "train_speed(iter/s)": 0.188931 + }, + { + "acc": 0.94626532, + "epoch": 1.6583830351225979, + "grad_norm": 3.5790919703147615, + "learning_rate": 9.970405061307439e-06, + "loss": 0.25025907, + "memory(GiB)": 33.01, + "step": 5005, + "train_speed(iter/s)": 0.188936 + }, + { + "acc": 0.94169321, + "epoch": 1.6600397614314115, + "grad_norm": 4.397053805409449, + "learning_rate": 9.97025607913687e-06, + "loss": 0.25981188, + "memory(GiB)": 33.01, + "step": 5010, + "train_speed(iter/s)": 0.18894 + }, + { + "acc": 0.95897369, + "epoch": 1.6616964877402252, + "grad_norm": 3.873323476185693, + "learning_rate": 9.97010672403473e-06, + "loss": 0.1975831, + "memory(GiB)": 33.01, + "step": 5015, + "train_speed(iter/s)": 0.188944 + }, + { + "acc": 0.94391537, + "epoch": 1.6633532140490392, + "grad_norm": 3.702807900229647, + "learning_rate": 9.969956996012225e-06, + "loss": 0.25931425, + "memory(GiB)": 33.01, + "step": 5020, + "train_speed(iter/s)": 0.188948 + }, + { + "acc": 0.9463459, + "epoch": 1.6650099403578529, + "grad_norm": 2.7456736700212057, + "learning_rate": 9.96980689508059e-06, + "loss": 0.22293758, + "memory(GiB)": 33.01, + "step": 5025, + "train_speed(iter/s)": 0.188953 + }, + { + "acc": 0.95056171, + "epoch": 1.6666666666666665, + "grad_norm": 4.5307337977824504, + "learning_rate": 9.96965642125109e-06, + "loss": 0.24241471, + "memory(GiB)": 33.01, + "step": 5030, + "train_speed(iter/s)": 0.188957 + }, + { + "acc": 0.95831909, + "epoch": 1.6683233929754806, + "grad_norm": 2.5561137922719492, + "learning_rate": 9.969505574535015e-06, + "loss": 0.21591077, + "memory(GiB)": 33.01, + "step": 5035, + "train_speed(iter/s)": 0.188962 + }, + { + "acc": 0.94991932, + "epoch": 1.6699801192842942, + "grad_norm": 3.988832293147868, + "learning_rate": 9.969354354943687e-06, + "loss": 0.25156703, + "memory(GiB)": 33.01, + "step": 5040, + "train_speed(iter/s)": 0.188966 + }, + { + "acc": 0.94121628, + "epoch": 1.6716368455931079, + "grad_norm": 3.322732560737147, + "learning_rate": 9.969202762488452e-06, + "loss": 0.27342329, + "memory(GiB)": 33.01, + "step": 5045, + "train_speed(iter/s)": 0.18897 + }, + { + "acc": 0.94726105, + "epoch": 1.673293571901922, + "grad_norm": 3.1076731504376363, + "learning_rate": 9.969050797180687e-06, + "loss": 0.228824, + "memory(GiB)": 33.01, + "step": 5050, + "train_speed(iter/s)": 0.188975 + }, + { + "acc": 0.9406065, + "epoch": 1.6749502982107356, + "grad_norm": 2.715517351499893, + "learning_rate": 9.968898459031793e-06, + "loss": 0.22380838, + "memory(GiB)": 33.01, + "step": 5055, + "train_speed(iter/s)": 0.188979 + }, + { + "acc": 0.95967426, + "epoch": 1.6766070245195492, + "grad_norm": 2.5665196685724334, + "learning_rate": 9.968745748053203e-06, + "loss": 0.2053267, + "memory(GiB)": 33.01, + "step": 5060, + "train_speed(iter/s)": 0.188984 + }, + { + "acc": 0.95684433, + "epoch": 1.678263750828363, + "grad_norm": 3.125254186816331, + "learning_rate": 9.968592664256378e-06, + "loss": 0.21209474, + "memory(GiB)": 33.01, + "step": 5065, + "train_speed(iter/s)": 0.188988 + }, + { + "acc": 0.95527973, + "epoch": 1.679920477137177, + "grad_norm": 4.22308090489758, + "learning_rate": 9.968439207652801e-06, + "loss": 0.21446476, + "memory(GiB)": 33.01, + "step": 5070, + "train_speed(iter/s)": 0.188992 + }, + { + "acc": 0.95776043, + "epoch": 1.6815772034459906, + "grad_norm": 3.2310268009096, + "learning_rate": 9.96828537825399e-06, + "loss": 0.20503042, + "memory(GiB)": 33.01, + "step": 5075, + "train_speed(iter/s)": 0.188996 + }, + { + "acc": 0.95915852, + "epoch": 1.6832339297548045, + "grad_norm": 3.8335144505767587, + "learning_rate": 9.96813117607149e-06, + "loss": 0.17421991, + "memory(GiB)": 33.01, + "step": 5080, + "train_speed(iter/s)": 0.189 + }, + { + "acc": 0.9563695, + "epoch": 1.6848906560636183, + "grad_norm": 2.8695194670299995, + "learning_rate": 9.967976601116873e-06, + "loss": 0.21266487, + "memory(GiB)": 33.01, + "step": 5085, + "train_speed(iter/s)": 0.189004 + }, + { + "acc": 0.96290131, + "epoch": 1.686547382372432, + "grad_norm": 4.501683510959438, + "learning_rate": 9.967821653401734e-06, + "loss": 0.16936184, + "memory(GiB)": 33.01, + "step": 5090, + "train_speed(iter/s)": 0.189008 + }, + { + "acc": 0.95391636, + "epoch": 1.6882041086812458, + "grad_norm": 4.418605474638244, + "learning_rate": 9.967666332937702e-06, + "loss": 0.23554609, + "memory(GiB)": 33.01, + "step": 5095, + "train_speed(iter/s)": 0.189012 + }, + { + "acc": 0.9614645, + "epoch": 1.6898608349900597, + "grad_norm": 3.942685151596654, + "learning_rate": 9.967510639736432e-06, + "loss": 0.23986785, + "memory(GiB)": 33.01, + "step": 5100, + "train_speed(iter/s)": 0.189016 + }, + { + "acc": 0.95797901, + "epoch": 1.6915175612988733, + "grad_norm": 4.000520180033358, + "learning_rate": 9.967354573809612e-06, + "loss": 0.20662632, + "memory(GiB)": 33.01, + "step": 5105, + "train_speed(iter/s)": 0.18902 + }, + { + "acc": 0.9563776, + "epoch": 1.6931742876076872, + "grad_norm": 3.002694099791044, + "learning_rate": 9.967198135168945e-06, + "loss": 0.21235328, + "memory(GiB)": 33.01, + "step": 5110, + "train_speed(iter/s)": 0.189025 + }, + { + "acc": 0.94639397, + "epoch": 1.694831013916501, + "grad_norm": 3.3130267697232045, + "learning_rate": 9.967041323826176e-06, + "loss": 0.25527148, + "memory(GiB)": 33.01, + "step": 5115, + "train_speed(iter/s)": 0.189029 + }, + { + "acc": 0.95592213, + "epoch": 1.6964877402253147, + "grad_norm": 3.336906608094463, + "learning_rate": 9.96688413979307e-06, + "loss": 0.20270851, + "memory(GiB)": 33.01, + "step": 5120, + "train_speed(iter/s)": 0.189033 + }, + { + "acc": 0.95745296, + "epoch": 1.6981444665341285, + "grad_norm": 4.44821439755114, + "learning_rate": 9.966726583081425e-06, + "loss": 0.20269413, + "memory(GiB)": 33.01, + "step": 5125, + "train_speed(iter/s)": 0.189037 + }, + { + "acc": 0.94841909, + "epoch": 1.6998011928429424, + "grad_norm": 3.580350601322894, + "learning_rate": 9.966568653703059e-06, + "loss": 0.2257447, + "memory(GiB)": 33.01, + "step": 5130, + "train_speed(iter/s)": 0.189041 + }, + { + "acc": 0.95071068, + "epoch": 1.701457919151756, + "grad_norm": 3.428909422067706, + "learning_rate": 9.966410351669827e-06, + "loss": 0.23408895, + "memory(GiB)": 33.01, + "step": 5135, + "train_speed(iter/s)": 0.189039 + }, + { + "acc": 0.964328, + "epoch": 1.70311464546057, + "grad_norm": 4.208990235126206, + "learning_rate": 9.96625167699361e-06, + "loss": 0.17427733, + "memory(GiB)": 33.01, + "step": 5140, + "train_speed(iter/s)": 0.189039 + }, + { + "acc": 0.95825062, + "epoch": 1.7047713717693838, + "grad_norm": 3.353043995134885, + "learning_rate": 9.966092629686306e-06, + "loss": 0.18491567, + "memory(GiB)": 33.01, + "step": 5145, + "train_speed(iter/s)": 0.189039 + }, + { + "acc": 0.95702209, + "epoch": 1.7064280980781974, + "grad_norm": 2.9130928584040086, + "learning_rate": 9.96593320975986e-06, + "loss": 0.19480469, + "memory(GiB)": 33.01, + "step": 5150, + "train_speed(iter/s)": 0.189044 + }, + { + "acc": 0.94524212, + "epoch": 1.7080848243870113, + "grad_norm": 3.237677337789052, + "learning_rate": 9.96577341722623e-06, + "loss": 0.21276062, + "memory(GiB)": 33.01, + "step": 5155, + "train_speed(iter/s)": 0.189048 + }, + { + "acc": 0.95643759, + "epoch": 1.7097415506958251, + "grad_norm": 3.0503011618136227, + "learning_rate": 9.965613252097407e-06, + "loss": 0.22177296, + "memory(GiB)": 33.01, + "step": 5160, + "train_speed(iter/s)": 0.189052 + }, + { + "acc": 0.95698528, + "epoch": 1.7113982770046388, + "grad_norm": 3.383540295767972, + "learning_rate": 9.965452714385411e-06, + "loss": 0.1991153, + "memory(GiB)": 33.01, + "step": 5165, + "train_speed(iter/s)": 0.189056 + }, + { + "acc": 0.96373568, + "epoch": 1.7130550033134526, + "grad_norm": 3.5158197989772724, + "learning_rate": 9.965291804102286e-06, + "loss": 0.20207872, + "memory(GiB)": 33.01, + "step": 5170, + "train_speed(iter/s)": 0.18906 + }, + { + "acc": 0.95304537, + "epoch": 1.7147117296222665, + "grad_norm": 3.345050508240883, + "learning_rate": 9.965130521260114e-06, + "loss": 0.22512956, + "memory(GiB)": 33.01, + "step": 5175, + "train_speed(iter/s)": 0.189064 + }, + { + "acc": 0.95467625, + "epoch": 1.7163684559310801, + "grad_norm": 3.7761870019739443, + "learning_rate": 9.964968865870988e-06, + "loss": 0.21822906, + "memory(GiB)": 33.01, + "step": 5180, + "train_speed(iter/s)": 0.189069 + }, + { + "acc": 0.95449238, + "epoch": 1.718025182239894, + "grad_norm": 6.041996880283651, + "learning_rate": 9.964806837947046e-06, + "loss": 0.21440597, + "memory(GiB)": 33.01, + "step": 5185, + "train_speed(iter/s)": 0.189073 + }, + { + "acc": 0.95477943, + "epoch": 1.7196819085487078, + "grad_norm": 3.384474313017865, + "learning_rate": 9.964644437500442e-06, + "loss": 0.19661164, + "memory(GiB)": 33.01, + "step": 5190, + "train_speed(iter/s)": 0.189077 + }, + { + "acc": 0.95581684, + "epoch": 1.7213386348575215, + "grad_norm": 2.7932712151654995, + "learning_rate": 9.964481664543367e-06, + "loss": 0.22425969, + "memory(GiB)": 33.01, + "step": 5195, + "train_speed(iter/s)": 0.189081 + }, + { + "acc": 0.95531349, + "epoch": 1.7229953611663353, + "grad_norm": 6.418297480498033, + "learning_rate": 9.964318519088031e-06, + "loss": 0.22553568, + "memory(GiB)": 33.01, + "step": 5200, + "train_speed(iter/s)": 0.189086 + }, + { + "acc": 0.95457287, + "epoch": 1.7246520874751492, + "grad_norm": 6.0263138539503665, + "learning_rate": 9.96415500114668e-06, + "loss": 0.20696769, + "memory(GiB)": 33.01, + "step": 5205, + "train_speed(iter/s)": 0.18909 + }, + { + "acc": 0.9520792, + "epoch": 1.7263088137839628, + "grad_norm": 3.8225435874716744, + "learning_rate": 9.96399111073158e-06, + "loss": 0.22758288, + "memory(GiB)": 33.01, + "step": 5210, + "train_speed(iter/s)": 0.189094 + }, + { + "acc": 0.96496429, + "epoch": 1.7279655400927767, + "grad_norm": 3.6492084490850134, + "learning_rate": 9.963826847855036e-06, + "loss": 0.19169132, + "memory(GiB)": 33.01, + "step": 5215, + "train_speed(iter/s)": 0.189098 + }, + { + "acc": 0.95372238, + "epoch": 1.7296222664015906, + "grad_norm": 5.643056842595738, + "learning_rate": 9.963662212529367e-06, + "loss": 0.22640302, + "memory(GiB)": 33.01, + "step": 5220, + "train_speed(iter/s)": 0.189102 + }, + { + "acc": 0.95633678, + "epoch": 1.7312789927104042, + "grad_norm": 6.03271100442074, + "learning_rate": 9.963497204766932e-06, + "loss": 0.21259847, + "memory(GiB)": 33.01, + "step": 5225, + "train_speed(iter/s)": 0.189107 + }, + { + "acc": 0.96206284, + "epoch": 1.732935719019218, + "grad_norm": 4.26004300569132, + "learning_rate": 9.963331824580114e-06, + "loss": 0.21050191, + "memory(GiB)": 33.01, + "step": 5230, + "train_speed(iter/s)": 0.189111 + }, + { + "acc": 0.957693, + "epoch": 1.734592445328032, + "grad_norm": 3.9193858249004085, + "learning_rate": 9.96316607198132e-06, + "loss": 0.20651617, + "memory(GiB)": 33.01, + "step": 5235, + "train_speed(iter/s)": 0.189115 + }, + { + "acc": 0.96739578, + "epoch": 1.7362491716368456, + "grad_norm": 2.4654973008604637, + "learning_rate": 9.962999946982989e-06, + "loss": 0.16179316, + "memory(GiB)": 33.01, + "step": 5240, + "train_speed(iter/s)": 0.18912 + }, + { + "acc": 0.96806068, + "epoch": 1.7379058979456594, + "grad_norm": 2.962843351851986, + "learning_rate": 9.962833449597588e-06, + "loss": 0.16479814, + "memory(GiB)": 33.01, + "step": 5245, + "train_speed(iter/s)": 0.189124 + }, + { + "acc": 0.95996628, + "epoch": 1.7395626242544733, + "grad_norm": 4.598241745371233, + "learning_rate": 9.962666579837611e-06, + "loss": 0.2062604, + "memory(GiB)": 33.01, + "step": 5250, + "train_speed(iter/s)": 0.189129 + }, + { + "acc": 0.96349878, + "epoch": 1.741219350563287, + "grad_norm": 3.3606613228712434, + "learning_rate": 9.962499337715579e-06, + "loss": 0.18981156, + "memory(GiB)": 33.01, + "step": 5255, + "train_speed(iter/s)": 0.189133 + }, + { + "acc": 0.94931068, + "epoch": 1.7428760768721008, + "grad_norm": 6.29495352456756, + "learning_rate": 9.962331723244042e-06, + "loss": 0.22791972, + "memory(GiB)": 33.01, + "step": 5260, + "train_speed(iter/s)": 0.189138 + }, + { + "acc": 0.95727224, + "epoch": 1.7445328031809146, + "grad_norm": 3.1276591507350804, + "learning_rate": 9.962163736435577e-06, + "loss": 0.21037591, + "memory(GiB)": 33.01, + "step": 5265, + "train_speed(iter/s)": 0.189142 + }, + { + "acc": 0.95165434, + "epoch": 1.7461895294897283, + "grad_norm": 3.2222867572456955, + "learning_rate": 9.961995377302792e-06, + "loss": 0.23067889, + "memory(GiB)": 33.01, + "step": 5270, + "train_speed(iter/s)": 0.189147 + }, + { + "acc": 0.9511673, + "epoch": 1.747846255798542, + "grad_norm": 2.621941939360323, + "learning_rate": 9.961826645858319e-06, + "loss": 0.21288648, + "memory(GiB)": 33.01, + "step": 5275, + "train_speed(iter/s)": 0.189151 + }, + { + "acc": 0.95167789, + "epoch": 1.749502982107356, + "grad_norm": 2.407501995466345, + "learning_rate": 9.961657542114822e-06, + "loss": 0.22208667, + "memory(GiB)": 33.01, + "step": 5280, + "train_speed(iter/s)": 0.189155 + }, + { + "acc": 0.94799423, + "epoch": 1.7511597084161696, + "grad_norm": 2.637185287430631, + "learning_rate": 9.961488066084985e-06, + "loss": 0.2063293, + "memory(GiB)": 33.01, + "step": 5285, + "train_speed(iter/s)": 0.18916 + }, + { + "acc": 0.96038609, + "epoch": 1.7528164347249833, + "grad_norm": 3.581585201701416, + "learning_rate": 9.961318217781532e-06, + "loss": 0.20009751, + "memory(GiB)": 33.01, + "step": 5290, + "train_speed(iter/s)": 0.189164 + }, + { + "acc": 0.95909309, + "epoch": 1.7544731610337974, + "grad_norm": 4.104222150516389, + "learning_rate": 9.961147997217207e-06, + "loss": 0.20937865, + "memory(GiB)": 33.01, + "step": 5295, + "train_speed(iter/s)": 0.189169 + }, + { + "acc": 0.96688213, + "epoch": 1.756129887342611, + "grad_norm": 2.178581712796887, + "learning_rate": 9.96097740440478e-06, + "loss": 0.17518711, + "memory(GiB)": 33.01, + "step": 5300, + "train_speed(iter/s)": 0.189173 + }, + { + "acc": 0.96675243, + "epoch": 1.7577866136514246, + "grad_norm": 5.061705003265265, + "learning_rate": 9.960806439357056e-06, + "loss": 0.18036057, + "memory(GiB)": 33.01, + "step": 5305, + "train_speed(iter/s)": 0.189177 + }, + { + "acc": 0.95813522, + "epoch": 1.7594433399602387, + "grad_norm": 3.2841932457130474, + "learning_rate": 9.96063510208686e-06, + "loss": 0.18793486, + "memory(GiB)": 33.01, + "step": 5310, + "train_speed(iter/s)": 0.189182 + }, + { + "acc": 0.96192608, + "epoch": 1.7611000662690524, + "grad_norm": 4.663571356022182, + "learning_rate": 9.960463392607053e-06, + "loss": 0.20335965, + "memory(GiB)": 33.01, + "step": 5315, + "train_speed(iter/s)": 0.189186 + }, + { + "acc": 0.96494179, + "epoch": 1.762756792577866, + "grad_norm": 3.5938288342932374, + "learning_rate": 9.960291310930522e-06, + "loss": 0.17716209, + "memory(GiB)": 33.01, + "step": 5320, + "train_speed(iter/s)": 0.189191 + }, + { + "acc": 0.95762358, + "epoch": 1.76441351888668, + "grad_norm": 5.279141194015146, + "learning_rate": 9.960118857070173e-06, + "loss": 0.21983705, + "memory(GiB)": 33.01, + "step": 5325, + "train_speed(iter/s)": 0.189195 + }, + { + "acc": 0.96453123, + "epoch": 1.7660702451954937, + "grad_norm": 3.180350743886554, + "learning_rate": 9.959946031038953e-06, + "loss": 0.16158001, + "memory(GiB)": 33.01, + "step": 5330, + "train_speed(iter/s)": 0.1892 + }, + { + "acc": 0.95384598, + "epoch": 1.7677269715043074, + "grad_norm": 4.137272848249057, + "learning_rate": 9.959772832849829e-06, + "loss": 0.19476873, + "memory(GiB)": 33.01, + "step": 5335, + "train_speed(iter/s)": 0.189204 + }, + { + "acc": 0.96212111, + "epoch": 1.7693836978131214, + "grad_norm": 3.018326915407665, + "learning_rate": 9.959599262515796e-06, + "loss": 0.17361283, + "memory(GiB)": 33.01, + "step": 5340, + "train_speed(iter/s)": 0.189208 + }, + { + "acc": 0.95687704, + "epoch": 1.771040424121935, + "grad_norm": 4.724545839141648, + "learning_rate": 9.959425320049883e-06, + "loss": 0.18612691, + "memory(GiB)": 33.01, + "step": 5345, + "train_speed(iter/s)": 0.189213 + }, + { + "acc": 0.95985813, + "epoch": 1.7726971504307487, + "grad_norm": 3.1952157664898784, + "learning_rate": 9.959251005465139e-06, + "loss": 0.17791543, + "memory(GiB)": 33.01, + "step": 5350, + "train_speed(iter/s)": 0.189217 + }, + { + "acc": 0.93917456, + "epoch": 1.7743538767395626, + "grad_norm": 7.908144443796971, + "learning_rate": 9.959076318774646e-06, + "loss": 0.29952276, + "memory(GiB)": 33.01, + "step": 5355, + "train_speed(iter/s)": 0.189222 + }, + { + "acc": 0.95899, + "epoch": 1.7760106030483764, + "grad_norm": 3.259262022956307, + "learning_rate": 9.958901259991513e-06, + "loss": 0.18029002, + "memory(GiB)": 33.01, + "step": 5360, + "train_speed(iter/s)": 0.189224 + }, + { + "acc": 0.95815773, + "epoch": 1.77766732935719, + "grad_norm": 2.3099222432152784, + "learning_rate": 9.958725829128877e-06, + "loss": 0.19149221, + "memory(GiB)": 33.01, + "step": 5365, + "train_speed(iter/s)": 0.189222 + }, + { + "acc": 0.96355696, + "epoch": 1.779324055666004, + "grad_norm": 3.452983714795854, + "learning_rate": 9.958550026199898e-06, + "loss": 0.19870839, + "memory(GiB)": 33.01, + "step": 5370, + "train_speed(iter/s)": 0.189221 + }, + { + "acc": 0.96189232, + "epoch": 1.7809807819748178, + "grad_norm": 1.6733743511786074, + "learning_rate": 9.958373851217772e-06, + "loss": 0.17930295, + "memory(GiB)": 33.01, + "step": 5375, + "train_speed(iter/s)": 0.189225 + }, + { + "acc": 0.97020321, + "epoch": 1.7826375082836314, + "grad_norm": 2.539753991816457, + "learning_rate": 9.95819730419572e-06, + "loss": 0.15973989, + "memory(GiB)": 33.01, + "step": 5380, + "train_speed(iter/s)": 0.18923 + }, + { + "acc": 0.96368361, + "epoch": 1.7842942345924453, + "grad_norm": 2.0468467926934912, + "learning_rate": 9.958020385146989e-06, + "loss": 0.19860497, + "memory(GiB)": 33.01, + "step": 5385, + "train_speed(iter/s)": 0.189234 + }, + { + "acc": 0.9708353, + "epoch": 1.7859509609012592, + "grad_norm": 2.8794638703337703, + "learning_rate": 9.957843094084853e-06, + "loss": 0.13811607, + "memory(GiB)": 33.01, + "step": 5390, + "train_speed(iter/s)": 0.189239 + }, + { + "acc": 0.96260929, + "epoch": 1.7876076872100728, + "grad_norm": 4.636465001333108, + "learning_rate": 9.95766543102262e-06, + "loss": 0.1859864, + "memory(GiB)": 33.01, + "step": 5395, + "train_speed(iter/s)": 0.189244 + }, + { + "acc": 0.95196228, + "epoch": 1.7892644135188867, + "grad_norm": 4.2647341048758936, + "learning_rate": 9.957487395973617e-06, + "loss": 0.21216271, + "memory(GiB)": 33.01, + "step": 5400, + "train_speed(iter/s)": 0.189248 + }, + { + "acc": 0.95609989, + "epoch": 1.7909211398277005, + "grad_norm": 4.495018159396596, + "learning_rate": 9.957308988951209e-06, + "loss": 0.21811047, + "memory(GiB)": 33.01, + "step": 5405, + "train_speed(iter/s)": 0.189253 + }, + { + "acc": 0.95574856, + "epoch": 1.7925778661365142, + "grad_norm": 3.054531740193686, + "learning_rate": 9.95713020996878e-06, + "loss": 0.20441015, + "memory(GiB)": 33.01, + "step": 5410, + "train_speed(iter/s)": 0.189257 + }, + { + "acc": 0.96194849, + "epoch": 1.794234592445328, + "grad_norm": 2.662537374343331, + "learning_rate": 9.956951059039747e-06, + "loss": 0.18904738, + "memory(GiB)": 33.01, + "step": 5415, + "train_speed(iter/s)": 0.189262 + }, + { + "acc": 0.96451588, + "epoch": 1.7958913187541419, + "grad_norm": 3.572118492814709, + "learning_rate": 9.956771536177553e-06, + "loss": 0.19641786, + "memory(GiB)": 33.01, + "step": 5420, + "train_speed(iter/s)": 0.189267 + }, + { + "acc": 0.96725283, + "epoch": 1.7975480450629555, + "grad_norm": 2.5921798885204437, + "learning_rate": 9.95659164139567e-06, + "loss": 0.16796941, + "memory(GiB)": 33.01, + "step": 5425, + "train_speed(iter/s)": 0.189272 + }, + { + "acc": 0.96207514, + "epoch": 1.7992047713717694, + "grad_norm": 3.7139254477161074, + "learning_rate": 9.956411374707596e-06, + "loss": 0.24679503, + "memory(GiB)": 33.01, + "step": 5430, + "train_speed(iter/s)": 0.189277 + }, + { + "acc": 0.95683517, + "epoch": 1.8008614976805832, + "grad_norm": 3.049983244753173, + "learning_rate": 9.956230736126861e-06, + "loss": 0.20307603, + "memory(GiB)": 33.01, + "step": 5435, + "train_speed(iter/s)": 0.189282 + }, + { + "acc": 0.97035847, + "epoch": 1.8025182239893969, + "grad_norm": 2.847732597823088, + "learning_rate": 9.956049725667017e-06, + "loss": 0.15548229, + "memory(GiB)": 33.01, + "step": 5440, + "train_speed(iter/s)": 0.189287 + }, + { + "acc": 0.96803818, + "epoch": 1.8041749502982107, + "grad_norm": 2.8413088988315227, + "learning_rate": 9.955868343341649e-06, + "loss": 0.16254368, + "memory(GiB)": 33.01, + "step": 5445, + "train_speed(iter/s)": 0.189291 + }, + { + "acc": 0.96036863, + "epoch": 1.8058316766070246, + "grad_norm": 4.110515646891542, + "learning_rate": 9.955686589164368e-06, + "loss": 0.19601402, + "memory(GiB)": 33.01, + "step": 5450, + "train_speed(iter/s)": 0.189296 + }, + { + "acc": 0.96154919, + "epoch": 1.8074884029158382, + "grad_norm": 2.6595176294751632, + "learning_rate": 9.955504463148812e-06, + "loss": 0.17074642, + "memory(GiB)": 33.01, + "step": 5455, + "train_speed(iter/s)": 0.189302 + }, + { + "acc": 0.96201286, + "epoch": 1.809145129224652, + "grad_norm": 3.7053677579403046, + "learning_rate": 9.955321965308649e-06, + "loss": 0.18650641, + "memory(GiB)": 33.01, + "step": 5460, + "train_speed(iter/s)": 0.189306 + }, + { + "acc": 0.96218853, + "epoch": 1.810801855533466, + "grad_norm": 3.859579308387411, + "learning_rate": 9.955139095657573e-06, + "loss": 0.17334511, + "memory(GiB)": 33.01, + "step": 5465, + "train_speed(iter/s)": 0.189311 + }, + { + "acc": 0.95393486, + "epoch": 1.8124585818422796, + "grad_norm": 5.1170078623053135, + "learning_rate": 9.954955854209306e-06, + "loss": 0.2070787, + "memory(GiB)": 33.01, + "step": 5470, + "train_speed(iter/s)": 0.189316 + }, + { + "acc": 0.9569067, + "epoch": 1.8141153081510935, + "grad_norm": 2.520467493843932, + "learning_rate": 9.954772240977597e-06, + "loss": 0.19718541, + "memory(GiB)": 33.01, + "step": 5475, + "train_speed(iter/s)": 0.189321 + }, + { + "acc": 0.96874485, + "epoch": 1.8157720344599073, + "grad_norm": 2.746935230366575, + "learning_rate": 9.954588255976228e-06, + "loss": 0.15265135, + "memory(GiB)": 33.01, + "step": 5480, + "train_speed(iter/s)": 0.189326 + }, + { + "acc": 0.95856409, + "epoch": 1.817428760768721, + "grad_norm": 3.689154169244318, + "learning_rate": 9.954403899219004e-06, + "loss": 0.18038337, + "memory(GiB)": 33.01, + "step": 5485, + "train_speed(iter/s)": 0.18933 + }, + { + "acc": 0.96436272, + "epoch": 1.8190854870775348, + "grad_norm": 3.482529267604087, + "learning_rate": 9.954219170719757e-06, + "loss": 0.17470418, + "memory(GiB)": 33.01, + "step": 5490, + "train_speed(iter/s)": 0.189336 + }, + { + "acc": 0.95511436, + "epoch": 1.8207422133863487, + "grad_norm": 3.5537877743981, + "learning_rate": 9.954034070492351e-06, + "loss": 0.20489969, + "memory(GiB)": 33.01, + "step": 5495, + "train_speed(iter/s)": 0.18934 + }, + { + "acc": 0.96482525, + "epoch": 1.8223989396951623, + "grad_norm": 2.419567994886498, + "learning_rate": 9.953848598550678e-06, + "loss": 0.18796244, + "memory(GiB)": 33.01, + "step": 5500, + "train_speed(iter/s)": 0.189345 + }, + { + "acc": 0.96336708, + "epoch": 1.8240556660039762, + "grad_norm": 3.3448878933897848, + "learning_rate": 9.953662754908652e-06, + "loss": 0.17106251, + "memory(GiB)": 33.01, + "step": 5505, + "train_speed(iter/s)": 0.18935 + }, + { + "acc": 0.96992645, + "epoch": 1.82571239231279, + "grad_norm": 2.7294900940660956, + "learning_rate": 9.95347653958022e-06, + "loss": 0.14668546, + "memory(GiB)": 33.01, + "step": 5510, + "train_speed(iter/s)": 0.189354 + }, + { + "acc": 0.95889301, + "epoch": 1.8273691186216037, + "grad_norm": 4.710714998865321, + "learning_rate": 9.953289952579356e-06, + "loss": 0.19427252, + "memory(GiB)": 33.01, + "step": 5515, + "train_speed(iter/s)": 0.189359 + }, + { + "acc": 0.95504227, + "epoch": 1.8290258449304175, + "grad_norm": 3.267686184607512, + "learning_rate": 9.953102993920061e-06, + "loss": 0.24130476, + "memory(GiB)": 33.01, + "step": 5520, + "train_speed(iter/s)": 0.189364 + }, + { + "acc": 0.95331078, + "epoch": 1.8306825712392314, + "grad_norm": 4.244190863046748, + "learning_rate": 9.952915663616367e-06, + "loss": 0.20748, + "memory(GiB)": 33.01, + "step": 5525, + "train_speed(iter/s)": 0.189369 + }, + { + "acc": 0.959585, + "epoch": 1.832339297548045, + "grad_norm": 5.624681869548543, + "learning_rate": 9.952727961682329e-06, + "loss": 0.21151447, + "memory(GiB)": 33.01, + "step": 5530, + "train_speed(iter/s)": 0.189374 + }, + { + "acc": 0.96110287, + "epoch": 1.833996023856859, + "grad_norm": 2.8042205318305466, + "learning_rate": 9.952539888132031e-06, + "loss": 0.18032423, + "memory(GiB)": 33.01, + "step": 5535, + "train_speed(iter/s)": 0.189378 + }, + { + "acc": 0.95601006, + "epoch": 1.8356527501656728, + "grad_norm": 3.27553391876023, + "learning_rate": 9.952351442979589e-06, + "loss": 0.22551713, + "memory(GiB)": 33.01, + "step": 5540, + "train_speed(iter/s)": 0.189383 + }, + { + "acc": 0.9670435, + "epoch": 1.8373094764744864, + "grad_norm": 3.4677278294009786, + "learning_rate": 9.95216262623914e-06, + "loss": 0.16397821, + "memory(GiB)": 33.01, + "step": 5545, + "train_speed(iter/s)": 0.189388 + }, + { + "acc": 0.9643259, + "epoch": 1.8389662027833003, + "grad_norm": 4.0715141841573566, + "learning_rate": 9.951973437924856e-06, + "loss": 0.18946757, + "memory(GiB)": 33.01, + "step": 5550, + "train_speed(iter/s)": 0.189393 + }, + { + "acc": 0.96782475, + "epoch": 1.8406229290921141, + "grad_norm": 3.620108181819642, + "learning_rate": 9.951783878050937e-06, + "loss": 0.15786865, + "memory(GiB)": 33.01, + "step": 5555, + "train_speed(iter/s)": 0.189397 + }, + { + "acc": 0.95842323, + "epoch": 1.8422796554009278, + "grad_norm": 4.018583108986213, + "learning_rate": 9.9515939466316e-06, + "loss": 0.17618065, + "memory(GiB)": 33.01, + "step": 5560, + "train_speed(iter/s)": 0.189402 + }, + { + "acc": 0.9770628, + "epoch": 1.8439363817097414, + "grad_norm": 3.4201700735753557, + "learning_rate": 9.9514036436811e-06, + "loss": 0.12254279, + "memory(GiB)": 33.01, + "step": 5565, + "train_speed(iter/s)": 0.189406 + }, + { + "acc": 0.97467213, + "epoch": 1.8455931080185555, + "grad_norm": 2.857966642793141, + "learning_rate": 9.951212969213721e-06, + "loss": 0.1302218, + "memory(GiB)": 33.01, + "step": 5570, + "train_speed(iter/s)": 0.189411 + }, + { + "acc": 0.97021542, + "epoch": 1.8472498343273691, + "grad_norm": 2.6506723907405125, + "learning_rate": 9.951021923243769e-06, + "loss": 0.16516323, + "memory(GiB)": 33.01, + "step": 5575, + "train_speed(iter/s)": 0.189415 + }, + { + "acc": 0.97295952, + "epoch": 1.8489065606361827, + "grad_norm": 4.35004800998398, + "learning_rate": 9.950830505785579e-06, + "loss": 0.16346823, + "memory(GiB)": 33.01, + "step": 5580, + "train_speed(iter/s)": 0.18942 + }, + { + "acc": 0.95436478, + "epoch": 1.8505632869449968, + "grad_norm": 5.718495853317044, + "learning_rate": 9.950638716853514e-06, + "loss": 0.19860299, + "memory(GiB)": 33.01, + "step": 5585, + "train_speed(iter/s)": 0.189422 + }, + { + "acc": 0.96391888, + "epoch": 1.8522200132538105, + "grad_norm": 2.732616807244597, + "learning_rate": 9.95044655646197e-06, + "loss": 0.16782058, + "memory(GiB)": 33.01, + "step": 5590, + "train_speed(iter/s)": 0.189425 + }, + { + "acc": 0.96666355, + "epoch": 1.853876739562624, + "grad_norm": 2.631055175162619, + "learning_rate": 9.950254024625365e-06, + "loss": 0.16699011, + "memory(GiB)": 33.01, + "step": 5595, + "train_speed(iter/s)": 0.189425 + }, + { + "acc": 0.9647212, + "epoch": 1.8555334658714382, + "grad_norm": 3.4960593654725756, + "learning_rate": 9.950061121358146e-06, + "loss": 0.17416546, + "memory(GiB)": 33.01, + "step": 5600, + "train_speed(iter/s)": 0.189426 + }, + { + "acc": 0.9627635, + "epoch": 1.8571901921802518, + "grad_norm": 3.4137551905665084, + "learning_rate": 9.949867846674787e-06, + "loss": 0.19350805, + "memory(GiB)": 33.01, + "step": 5605, + "train_speed(iter/s)": 0.18943 + }, + { + "acc": 0.96785946, + "epoch": 1.8588469184890655, + "grad_norm": 4.60939902857875, + "learning_rate": 9.949674200589795e-06, + "loss": 0.17039739, + "memory(GiB)": 33.01, + "step": 5610, + "train_speed(iter/s)": 0.189435 + }, + { + "acc": 0.96497545, + "epoch": 1.8605036447978796, + "grad_norm": 3.0991418877067023, + "learning_rate": 9.949480183117699e-06, + "loss": 0.16247368, + "memory(GiB)": 33.01, + "step": 5615, + "train_speed(iter/s)": 0.189439 + }, + { + "acc": 0.96137867, + "epoch": 1.8621603711066932, + "grad_norm": 3.78423727199372, + "learning_rate": 9.94928579427306e-06, + "loss": 0.17658565, + "memory(GiB)": 33.01, + "step": 5620, + "train_speed(iter/s)": 0.189444 + }, + { + "acc": 0.96758881, + "epoch": 1.8638170974155068, + "grad_norm": 2.786274762879028, + "learning_rate": 9.94909103407046e-06, + "loss": 0.1596454, + "memory(GiB)": 33.01, + "step": 5625, + "train_speed(iter/s)": 0.189448 + }, + { + "acc": 0.9570466, + "epoch": 1.8654738237243207, + "grad_norm": 2.4061052241565153, + "learning_rate": 9.948895902524519e-06, + "loss": 0.1833902, + "memory(GiB)": 33.01, + "step": 5630, + "train_speed(iter/s)": 0.189453 + }, + { + "acc": 0.9596221, + "epoch": 1.8671305500331346, + "grad_norm": 3.9073739215875074, + "learning_rate": 9.948700399649877e-06, + "loss": 0.18362517, + "memory(GiB)": 33.01, + "step": 5635, + "train_speed(iter/s)": 0.189457 + }, + { + "acc": 0.96014395, + "epoch": 1.8687872763419482, + "grad_norm": 4.525765170518365, + "learning_rate": 9.948504525461206e-06, + "loss": 0.18924073, + "memory(GiB)": 33.01, + "step": 5640, + "train_speed(iter/s)": 0.189462 + }, + { + "acc": 0.96675854, + "epoch": 1.870444002650762, + "grad_norm": 2.304845456733717, + "learning_rate": 9.948308279973206e-06, + "loss": 0.15461531, + "memory(GiB)": 33.01, + "step": 5645, + "train_speed(iter/s)": 0.189466 + }, + { + "acc": 0.96799221, + "epoch": 1.872100728959576, + "grad_norm": 3.1668688892631622, + "learning_rate": 9.9481116632006e-06, + "loss": 0.15957694, + "memory(GiB)": 33.01, + "step": 5650, + "train_speed(iter/s)": 0.189471 + }, + { + "acc": 0.96451693, + "epoch": 1.8737574552683895, + "grad_norm": 3.9604493761308337, + "learning_rate": 9.947914675158143e-06, + "loss": 0.19846363, + "memory(GiB)": 33.01, + "step": 5655, + "train_speed(iter/s)": 0.189475 + }, + { + "acc": 0.97062197, + "epoch": 1.8754141815772034, + "grad_norm": 4.31774606534621, + "learning_rate": 9.947717315860617e-06, + "loss": 0.15030167, + "memory(GiB)": 33.01, + "step": 5660, + "train_speed(iter/s)": 0.18948 + }, + { + "acc": 0.96638985, + "epoch": 1.8770709078860173, + "grad_norm": 4.0686747933870615, + "learning_rate": 9.947519585322832e-06, + "loss": 0.15593964, + "memory(GiB)": 33.01, + "step": 5665, + "train_speed(iter/s)": 0.189484 + }, + { + "acc": 0.96437912, + "epoch": 1.878727634194831, + "grad_norm": 3.9042899926660724, + "learning_rate": 9.947321483559628e-06, + "loss": 0.17043731, + "memory(GiB)": 33.01, + "step": 5670, + "train_speed(iter/s)": 0.189489 + }, + { + "acc": 0.95107632, + "epoch": 1.8803843605036448, + "grad_norm": 4.4675725921749825, + "learning_rate": 9.947123010585868e-06, + "loss": 0.21032701, + "memory(GiB)": 33.01, + "step": 5675, + "train_speed(iter/s)": 0.189493 + }, + { + "acc": 0.96552181, + "epoch": 1.8820410868124586, + "grad_norm": 2.2279364504453696, + "learning_rate": 9.946924166416445e-06, + "loss": 0.17136279, + "memory(GiB)": 33.01, + "step": 5680, + "train_speed(iter/s)": 0.189497 + }, + { + "acc": 0.96349983, + "epoch": 1.8836978131212723, + "grad_norm": 3.4830285073515905, + "learning_rate": 9.946724951066282e-06, + "loss": 0.19150252, + "memory(GiB)": 33.01, + "step": 5685, + "train_speed(iter/s)": 0.189502 + }, + { + "acc": 0.96433315, + "epoch": 1.8853545394300861, + "grad_norm": 2.812956752967826, + "learning_rate": 9.94652536455033e-06, + "loss": 0.17125444, + "memory(GiB)": 33.01, + "step": 5690, + "train_speed(iter/s)": 0.189506 + }, + { + "acc": 0.96799431, + "epoch": 1.8870112657389, + "grad_norm": 3.6819692867184632, + "learning_rate": 9.94632540688356e-06, + "loss": 0.17901212, + "memory(GiB)": 33.01, + "step": 5695, + "train_speed(iter/s)": 0.18951 + }, + { + "acc": 0.96423416, + "epoch": 1.8886679920477136, + "grad_norm": 3.2011576709080645, + "learning_rate": 9.946125078080982e-06, + "loss": 0.17200465, + "memory(GiB)": 33.01, + "step": 5700, + "train_speed(iter/s)": 0.189515 + }, + { + "acc": 0.96397667, + "epoch": 1.8903247183565275, + "grad_norm": 3.58136606188859, + "learning_rate": 9.945924378157627e-06, + "loss": 0.17308494, + "memory(GiB)": 33.01, + "step": 5705, + "train_speed(iter/s)": 0.189519 + }, + { + "acc": 0.97142429, + "epoch": 1.8919814446653413, + "grad_norm": 4.125904550145324, + "learning_rate": 9.945723307128558e-06, + "loss": 0.15433273, + "memory(GiB)": 33.01, + "step": 5710, + "train_speed(iter/s)": 0.189524 + }, + { + "acc": 0.96425629, + "epoch": 1.893638170974155, + "grad_norm": 3.413357208908564, + "learning_rate": 9.945521865008859e-06, + "loss": 0.16915461, + "memory(GiB)": 33.01, + "step": 5715, + "train_speed(iter/s)": 0.189528 + }, + { + "acc": 0.968606, + "epoch": 1.8952948972829688, + "grad_norm": 3.596202266452188, + "learning_rate": 9.94532005181365e-06, + "loss": 0.15530485, + "memory(GiB)": 33.01, + "step": 5720, + "train_speed(iter/s)": 0.189532 + }, + { + "acc": 0.96648588, + "epoch": 1.8969516235917827, + "grad_norm": 4.748709323979294, + "learning_rate": 9.945117867558072e-06, + "loss": 0.15111293, + "memory(GiB)": 33.01, + "step": 5725, + "train_speed(iter/s)": 0.189537 + }, + { + "acc": 0.96649303, + "epoch": 1.8986083499005963, + "grad_norm": 5.168717551643759, + "learning_rate": 9.9449153122573e-06, + "loss": 0.17624353, + "memory(GiB)": 33.01, + "step": 5730, + "train_speed(iter/s)": 0.189541 + }, + { + "acc": 0.9574091, + "epoch": 1.9002650762094102, + "grad_norm": 6.790787925776887, + "learning_rate": 9.94471238592653e-06, + "loss": 0.19798819, + "memory(GiB)": 33.01, + "step": 5735, + "train_speed(iter/s)": 0.189546 + }, + { + "acc": 0.96924324, + "epoch": 1.901921802518224, + "grad_norm": 3.632939503084288, + "learning_rate": 9.944509088580995e-06, + "loss": 0.13913025, + "memory(GiB)": 33.01, + "step": 5740, + "train_speed(iter/s)": 0.18955 + }, + { + "acc": 0.96660843, + "epoch": 1.9035785288270377, + "grad_norm": 2.941111655968318, + "learning_rate": 9.944305420235947e-06, + "loss": 0.16818404, + "memory(GiB)": 33.01, + "step": 5745, + "train_speed(iter/s)": 0.189555 + }, + { + "acc": 0.97014704, + "epoch": 1.9052352551358516, + "grad_norm": 3.759779853082322, + "learning_rate": 9.944101380906667e-06, + "loss": 0.1460492, + "memory(GiB)": 33.01, + "step": 5750, + "train_speed(iter/s)": 0.189559 + }, + { + "acc": 0.96233149, + "epoch": 1.9068919814446654, + "grad_norm": 2.9026664681000316, + "learning_rate": 9.943896970608472e-06, + "loss": 0.1657903, + "memory(GiB)": 33.01, + "step": 5755, + "train_speed(iter/s)": 0.189563 + }, + { + "acc": 0.96337318, + "epoch": 1.908548707753479, + "grad_norm": 2.7982066350813044, + "learning_rate": 9.943692189356696e-06, + "loss": 0.15194042, + "memory(GiB)": 33.01, + "step": 5760, + "train_speed(iter/s)": 0.189567 + }, + { + "acc": 0.96945772, + "epoch": 1.910205434062293, + "grad_norm": 3.8936747600975052, + "learning_rate": 9.943487037166708e-06, + "loss": 0.13850453, + "memory(GiB)": 33.01, + "step": 5765, + "train_speed(iter/s)": 0.189571 + }, + { + "acc": 0.95843439, + "epoch": 1.9118621603711068, + "grad_norm": 4.310320822323053, + "learning_rate": 9.943281514053902e-06, + "loss": 0.18423518, + "memory(GiB)": 33.01, + "step": 5770, + "train_speed(iter/s)": 0.189575 + }, + { + "acc": 0.96850796, + "epoch": 1.9135188866799204, + "grad_norm": 3.6817320343191, + "learning_rate": 9.943075620033702e-06, + "loss": 0.15167605, + "memory(GiB)": 33.01, + "step": 5775, + "train_speed(iter/s)": 0.18958 + }, + { + "acc": 0.9682394, + "epoch": 1.9151756129887343, + "grad_norm": 2.986590310742142, + "learning_rate": 9.942869355121556e-06, + "loss": 0.15726867, + "memory(GiB)": 33.01, + "step": 5780, + "train_speed(iter/s)": 0.189584 + }, + { + "acc": 0.96426868, + "epoch": 1.9168323392975481, + "grad_norm": 4.582984323255717, + "learning_rate": 9.942662719332945e-06, + "loss": 0.17554381, + "memory(GiB)": 33.01, + "step": 5785, + "train_speed(iter/s)": 0.189589 + }, + { + "acc": 0.96047783, + "epoch": 1.9184890656063618, + "grad_norm": 3.248707840523796, + "learning_rate": 9.942455712683373e-06, + "loss": 0.19423541, + "memory(GiB)": 33.01, + "step": 5790, + "train_speed(iter/s)": 0.189593 + }, + { + "acc": 0.96772156, + "epoch": 1.9201457919151756, + "grad_norm": 5.082307173847315, + "learning_rate": 9.942248335188373e-06, + "loss": 0.15759501, + "memory(GiB)": 33.01, + "step": 5795, + "train_speed(iter/s)": 0.189597 + }, + { + "acc": 0.965481, + "epoch": 1.9218025182239895, + "grad_norm": 3.4795951895953077, + "learning_rate": 9.94204058686351e-06, + "loss": 0.16091878, + "memory(GiB)": 33.01, + "step": 5800, + "train_speed(iter/s)": 0.189601 + }, + { + "acc": 0.96041965, + "epoch": 1.9234592445328031, + "grad_norm": 3.2883884783871697, + "learning_rate": 9.94183246772437e-06, + "loss": 0.18903304, + "memory(GiB)": 33.01, + "step": 5805, + "train_speed(iter/s)": 0.189606 + }, + { + "acc": 0.95328217, + "epoch": 1.925115970841617, + "grad_norm": 2.7988489264641285, + "learning_rate": 9.941623977786572e-06, + "loss": 0.21281953, + "memory(GiB)": 33.01, + "step": 5810, + "train_speed(iter/s)": 0.18961 + }, + { + "acc": 0.95457306, + "epoch": 1.9267726971504309, + "grad_norm": 3.389266641302172, + "learning_rate": 9.941415117065761e-06, + "loss": 0.17420642, + "memory(GiB)": 33.01, + "step": 5815, + "train_speed(iter/s)": 0.18961 + }, + { + "acc": 0.96181059, + "epoch": 1.9284294234592445, + "grad_norm": 4.1386874724940785, + "learning_rate": 9.941205885577608e-06, + "loss": 0.21812885, + "memory(GiB)": 33.01, + "step": 5820, + "train_speed(iter/s)": 0.189611 + }, + { + "acc": 0.96660843, + "epoch": 1.9300861497680584, + "grad_norm": 2.84832216281939, + "learning_rate": 9.940996283337817e-06, + "loss": 0.16445436, + "memory(GiB)": 33.01, + "step": 5825, + "train_speed(iter/s)": 0.18961 + }, + { + "acc": 0.96643486, + "epoch": 1.9317428760768722, + "grad_norm": 4.206598523108837, + "learning_rate": 9.940786310362114e-06, + "loss": 0.16456815, + "memory(GiB)": 33.01, + "step": 5830, + "train_speed(iter/s)": 0.189614 + }, + { + "acc": 0.96946182, + "epoch": 1.9333996023856859, + "grad_norm": 4.69027460844793, + "learning_rate": 9.940575966666259e-06, + "loss": 0.15309441, + "memory(GiB)": 33.01, + "step": 5835, + "train_speed(iter/s)": 0.189619 + }, + { + "acc": 0.97259493, + "epoch": 1.9350563286944995, + "grad_norm": 4.395042611092975, + "learning_rate": 9.940365252266032e-06, + "loss": 0.14127417, + "memory(GiB)": 33.01, + "step": 5840, + "train_speed(iter/s)": 0.189623 + }, + { + "acc": 0.96640015, + "epoch": 1.9367130550033136, + "grad_norm": 4.317823436957311, + "learning_rate": 9.940154167177245e-06, + "loss": 0.16580306, + "memory(GiB)": 33.01, + "step": 5845, + "train_speed(iter/s)": 0.189627 + }, + { + "acc": 0.95902576, + "epoch": 1.9383697813121272, + "grad_norm": 4.2769826619736735, + "learning_rate": 9.939942711415745e-06, + "loss": 0.18908883, + "memory(GiB)": 33.01, + "step": 5850, + "train_speed(iter/s)": 0.189631 + }, + { + "acc": 0.9634017, + "epoch": 1.9400265076209409, + "grad_norm": 3.0482091477460203, + "learning_rate": 9.93973088499739e-06, + "loss": 0.16514906, + "memory(GiB)": 33.01, + "step": 5855, + "train_speed(iter/s)": 0.189636 + }, + { + "acc": 0.95178919, + "epoch": 1.941683233929755, + "grad_norm": 3.310898769159179, + "learning_rate": 9.93951868793808e-06, + "loss": 0.21605191, + "memory(GiB)": 33.01, + "step": 5860, + "train_speed(iter/s)": 0.18964 + }, + { + "acc": 0.95677967, + "epoch": 1.9433399602385686, + "grad_norm": 2.0142825940634896, + "learning_rate": 9.93930612025374e-06, + "loss": 0.17813394, + "memory(GiB)": 33.01, + "step": 5865, + "train_speed(iter/s)": 0.189644 + }, + { + "acc": 0.96564751, + "epoch": 1.9449966865473822, + "grad_norm": 3.2560139068325338, + "learning_rate": 9.93909318196032e-06, + "loss": 0.16362079, + "memory(GiB)": 33.01, + "step": 5870, + "train_speed(iter/s)": 0.189648 + }, + { + "acc": 0.96476917, + "epoch": 1.9466534128561963, + "grad_norm": 2.8173555239865538, + "learning_rate": 9.938879873073794e-06, + "loss": 0.17951397, + "memory(GiB)": 33.01, + "step": 5875, + "train_speed(iter/s)": 0.189652 + }, + { + "acc": 0.96589565, + "epoch": 1.94831013916501, + "grad_norm": 3.174051439704815, + "learning_rate": 9.938666193610177e-06, + "loss": 0.14410775, + "memory(GiB)": 33.01, + "step": 5880, + "train_speed(iter/s)": 0.189657 + }, + { + "acc": 0.9731617, + "epoch": 1.9499668654738236, + "grad_norm": 3.8486874991213114, + "learning_rate": 9.938452143585498e-06, + "loss": 0.12638721, + "memory(GiB)": 33.01, + "step": 5885, + "train_speed(iter/s)": 0.189661 + }, + { + "acc": 0.97085266, + "epoch": 1.9516235917826377, + "grad_norm": 5.114970105369668, + "learning_rate": 9.938237723015821e-06, + "loss": 0.15634813, + "memory(GiB)": 33.01, + "step": 5890, + "train_speed(iter/s)": 0.189665 + }, + { + "acc": 0.97674122, + "epoch": 1.9532803180914513, + "grad_norm": 11.655025180971151, + "learning_rate": 9.938022931917237e-06, + "loss": 0.13883998, + "memory(GiB)": 33.01, + "step": 5895, + "train_speed(iter/s)": 0.189669 + }, + { + "acc": 0.96295128, + "epoch": 1.954937044400265, + "grad_norm": 4.368004065579172, + "learning_rate": 9.937807770305861e-06, + "loss": 0.18861229, + "memory(GiB)": 33.01, + "step": 5900, + "train_speed(iter/s)": 0.189674 + }, + { + "acc": 0.96569958, + "epoch": 1.9565937707090788, + "grad_norm": 3.3910342904685313, + "learning_rate": 9.937592238197843e-06, + "loss": 0.16494408, + "memory(GiB)": 33.01, + "step": 5905, + "train_speed(iter/s)": 0.189678 + }, + { + "acc": 0.95811682, + "epoch": 1.9582504970178927, + "grad_norm": 4.1514916422681045, + "learning_rate": 9.937376335609353e-06, + "loss": 0.18387959, + "memory(GiB)": 33.01, + "step": 5910, + "train_speed(iter/s)": 0.189682 + }, + { + "acc": 0.96629286, + "epoch": 1.9599072233267063, + "grad_norm": 2.8805711303155124, + "learning_rate": 9.937160062556596e-06, + "loss": 0.16447697, + "memory(GiB)": 33.01, + "step": 5915, + "train_speed(iter/s)": 0.189686 + }, + { + "acc": 0.96363659, + "epoch": 1.9615639496355202, + "grad_norm": 3.8383715650472063, + "learning_rate": 9.936943419055797e-06, + "loss": 0.16664251, + "memory(GiB)": 33.01, + "step": 5920, + "train_speed(iter/s)": 0.18969 + }, + { + "acc": 0.96479034, + "epoch": 1.963220675944334, + "grad_norm": 3.083885617741232, + "learning_rate": 9.936726405123215e-06, + "loss": 0.15399033, + "memory(GiB)": 33.01, + "step": 5925, + "train_speed(iter/s)": 0.189694 + }, + { + "acc": 0.96476812, + "epoch": 1.9648774022531477, + "grad_norm": 5.317195827197609, + "learning_rate": 9.936509020775135e-06, + "loss": 0.1703403, + "memory(GiB)": 33.01, + "step": 5930, + "train_speed(iter/s)": 0.189698 + }, + { + "acc": 0.96486731, + "epoch": 1.9665341285619615, + "grad_norm": 5.109553051311444, + "learning_rate": 9.936291266027869e-06, + "loss": 0.15160818, + "memory(GiB)": 33.01, + "step": 5935, + "train_speed(iter/s)": 0.189702 + }, + { + "acc": 0.9709569, + "epoch": 1.9681908548707754, + "grad_norm": 4.351579018153298, + "learning_rate": 9.936073140897757e-06, + "loss": 0.1614222, + "memory(GiB)": 33.01, + "step": 5940, + "train_speed(iter/s)": 0.189706 + }, + { + "acc": 0.96952105, + "epoch": 1.969847581179589, + "grad_norm": 3.496315852302909, + "learning_rate": 9.935854645401169e-06, + "loss": 0.16233735, + "memory(GiB)": 33.01, + "step": 5945, + "train_speed(iter/s)": 0.18971 + }, + { + "acc": 0.96859989, + "epoch": 1.9715043074884029, + "grad_norm": 4.402218749302247, + "learning_rate": 9.9356357795545e-06, + "loss": 0.16873567, + "memory(GiB)": 33.01, + "step": 5950, + "train_speed(iter/s)": 0.189714 + }, + { + "acc": 0.96829967, + "epoch": 1.9731610337972167, + "grad_norm": 5.259072901465839, + "learning_rate": 9.935416543374172e-06, + "loss": 0.19206886, + "memory(GiB)": 33.01, + "step": 5955, + "train_speed(iter/s)": 0.189718 + }, + { + "acc": 0.98156147, + "epoch": 1.9748177601060304, + "grad_norm": 2.9739096459866654, + "learning_rate": 9.93519693687664e-06, + "loss": 0.12224423, + "memory(GiB)": 33.01, + "step": 5960, + "train_speed(iter/s)": 0.189722 + }, + { + "acc": 0.96269608, + "epoch": 1.9764744864148442, + "grad_norm": 6.016239236015463, + "learning_rate": 9.93497696007838e-06, + "loss": 0.19697915, + "memory(GiB)": 33.01, + "step": 5965, + "train_speed(iter/s)": 0.189725 + }, + { + "acc": 0.97393789, + "epoch": 1.978131212723658, + "grad_norm": 6.86830193723295, + "learning_rate": 9.9347566129959e-06, + "loss": 0.1602066, + "memory(GiB)": 33.01, + "step": 5970, + "train_speed(iter/s)": 0.189729 + }, + { + "acc": 0.96911526, + "epoch": 1.9797879390324717, + "grad_norm": 4.370998411656359, + "learning_rate": 9.934535895645735e-06, + "loss": 0.16885473, + "memory(GiB)": 33.01, + "step": 5975, + "train_speed(iter/s)": 0.189733 + }, + { + "acc": 0.96276951, + "epoch": 1.9814446653412856, + "grad_norm": 5.0378776162092365, + "learning_rate": 9.93431480804445e-06, + "loss": 0.18467482, + "memory(GiB)": 33.01, + "step": 5980, + "train_speed(iter/s)": 0.189737 + }, + { + "acc": 0.9665226, + "epoch": 1.9831013916500995, + "grad_norm": 5.355865138419161, + "learning_rate": 9.934093350208633e-06, + "loss": 0.17767522, + "memory(GiB)": 33.01, + "step": 5985, + "train_speed(iter/s)": 0.189741 + }, + { + "acc": 0.97305756, + "epoch": 1.984758117958913, + "grad_norm": 4.467060175978817, + "learning_rate": 9.933871522154901e-06, + "loss": 0.14633491, + "memory(GiB)": 33.01, + "step": 5990, + "train_speed(iter/s)": 0.189745 + }, + { + "acc": 0.96583843, + "epoch": 1.986414844267727, + "grad_norm": 2.93855599647747, + "learning_rate": 9.933649323899903e-06, + "loss": 0.14597803, + "memory(GiB)": 33.01, + "step": 5995, + "train_speed(iter/s)": 0.189749 + }, + { + "acc": 0.97019806, + "epoch": 1.9880715705765408, + "grad_norm": 5.470550197007648, + "learning_rate": 9.933426755460312e-06, + "loss": 0.16220329, + "memory(GiB)": 33.01, + "step": 6000, + "train_speed(iter/s)": 0.189753 + }, + { + "acc": 0.98152065, + "epoch": 1.9897282968853545, + "grad_norm": 2.4884927713641116, + "learning_rate": 9.933203816852828e-06, + "loss": 0.10297159, + "memory(GiB)": 33.01, + "step": 6005, + "train_speed(iter/s)": 0.189757 + }, + { + "acc": 0.96945152, + "epoch": 1.9913850231941683, + "grad_norm": 2.2605004743369124, + "learning_rate": 9.932980508094182e-06, + "loss": 0.13022065, + "memory(GiB)": 33.01, + "step": 6010, + "train_speed(iter/s)": 0.189761 + }, + { + "acc": 0.96832008, + "epoch": 1.9930417495029822, + "grad_norm": 4.646889690280367, + "learning_rate": 9.93275682920113e-06, + "loss": 0.16072577, + "memory(GiB)": 33.01, + "step": 6015, + "train_speed(iter/s)": 0.189765 + }, + { + "acc": 0.96582203, + "epoch": 1.9946984758117958, + "grad_norm": 4.175424237969419, + "learning_rate": 9.932532780190458e-06, + "loss": 0.16094139, + "memory(GiB)": 33.01, + "step": 6020, + "train_speed(iter/s)": 0.189769 + }, + { + "acc": 0.96637249, + "epoch": 1.9963552021206097, + "grad_norm": 4.636797532893509, + "learning_rate": 9.932308361078978e-06, + "loss": 0.16655571, + "memory(GiB)": 33.01, + "step": 6025, + "train_speed(iter/s)": 0.189773 + }, + { + "acc": 0.9627594, + "epoch": 1.9980119284294235, + "grad_norm": 3.059624579163151, + "learning_rate": 9.932083571883532e-06, + "loss": 0.18198231, + "memory(GiB)": 33.01, + "step": 6030, + "train_speed(iter/s)": 0.189777 + }, + { + "acc": 0.96720581, + "epoch": 1.9996686547382372, + "grad_norm": 3.3566397258529785, + "learning_rate": 9.931858412620986e-06, + "loss": 0.15749495, + "memory(GiB)": 33.01, + "step": 6035, + "train_speed(iter/s)": 0.189782 + }, + { + "epoch": 2.0, + "eval_acc": 0.9713093375065206, + "eval_loss": 0.07300830632448196, + "eval_runtime": 508.1599, + "eval_samples_per_second": 10.558, + "eval_steps_per_second": 1.32, + "step": 6036 + }, + { + "acc": 0.41311496, + "epoch": 10.901713255184852, + "grad_norm": 46.142024967492716, + "learning_rate": 4.667995707060734e-06, + "loss": 18.62856674, + "memory(GiB)": 34.67, + "step": 6040, + "train_speed(iter/s)": 134.249727 + }, + { + "acc": 0.46514187, + "epoch": 10.910730387736699, + "grad_norm": 24.64683148399521, + "learning_rate": 4.66055183850045e-06, + "loss": 8.34107056, + "memory(GiB)": 34.67, + "step": 6045, + "train_speed(iter/s)": 81.484968 + }, + { + "acc": 0.5982141, + "epoch": 10.919747520288547, + "grad_norm": 23.668328342267483, + "learning_rate": 4.653108726992041e-06, + "loss": 4.0812561, + "memory(GiB)": 34.67, + "step": 6050, + "train_speed(iter/s)": 58.456234 + }, + { + "acc": 0.76831255, + "epoch": 10.928764652840396, + "grad_norm": 8.75274323714371, + "learning_rate": 4.645666389111036e-06, + "loss": 1.87636223, + "memory(GiB)": 34.67, + "step": 6055, + "train_speed(iter/s)": 45.601259 + }, + { + "acc": 0.77068529, + "epoch": 10.937781785392245, + "grad_norm": 7.157418515237572, + "learning_rate": 4.638224841431247e-06, + "loss": 1.38030567, + "memory(GiB)": 34.67, + "step": 6060, + "train_speed(iter/s)": 37.348973 + }, + { + "acc": 0.82335567, + "epoch": 10.946798917944093, + "grad_norm": 6.404999379717916, + "learning_rate": 4.630784100524721e-06, + "loss": 1.05846272, + "memory(GiB)": 34.67, + "step": 6065, + "train_speed(iter/s)": 31.659352 + }, + { + "acc": 0.81055689, + "epoch": 10.955816050495942, + "grad_norm": 5.668317115514912, + "learning_rate": 4.623344182961712e-06, + "loss": 1.10004044, + "memory(GiB)": 34.67, + "step": 6070, + "train_speed(iter/s)": 27.482731 + }, + { + "acc": 0.81895657, + "epoch": 10.96483318304779, + "grad_norm": 5.1716632093404105, + "learning_rate": 4.615905105310636e-06, + "loss": 1.02108898, + "memory(GiB)": 34.67, + "step": 6075, + "train_speed(iter/s)": 24.283442 + }, + { + "acc": 0.82773285, + "epoch": 10.97385031559964, + "grad_norm": 8.46597160197095, + "learning_rate": 4.608466884138046e-06, + "loss": 1.06346617, + "memory(GiB)": 34.67, + "step": 6080, + "train_speed(iter/s)": 21.750531 + }, + { + "acc": 0.85879459, + "epoch": 10.982867448151488, + "grad_norm": 7.763148888195122, + "learning_rate": 4.601029536008578e-06, + "loss": 0.83358173, + "memory(GiB)": 34.67, + "step": 6085, + "train_speed(iter/s)": 19.702423 + }, + { + "acc": 0.85695562, + "epoch": 10.991884580703337, + "grad_norm": 3.3846154403296946, + "learning_rate": 4.593593077484933e-06, + "loss": 0.80661335, + "memory(GiB)": 34.67, + "step": 6090, + "train_speed(iter/s)": 18.009462 + }, + { + "epoch": 10.999098286744815, + "eval_acc": 0.8568162926018288, + "eval_loss": 0.4145449101924896, + "eval_runtime": 49.8486, + "eval_samples_per_second": 9.368, + "eval_steps_per_second": 1.184, + "step": 6094 + }, + { + "acc": 0.95750618, + "epoch": 11.001803426510369, + "grad_norm": 4.465321488619032, + "learning_rate": 4.586157525127823e-06, + "loss": 0.84973698, + "memory(GiB)": 34.67, + "step": 6095, + "train_speed(iter/s)": 12.899899 + }, + { + "acc": 0.86048241, + "epoch": 11.010820559062218, + "grad_norm": 7.565049725009297, + "learning_rate": 4.57872289549595e-06, + "loss": 0.87631731, + "memory(GiB)": 34.67, + "step": 6100, + "train_speed(iter/s)": 12.156184 + }, + { + "acc": 0.86794395, + "epoch": 11.019837691614066, + "grad_norm": 4.406828723959708, + "learning_rate": 4.5712892051459525e-06, + "loss": 0.77457833, + "memory(GiB)": 34.67, + "step": 6105, + "train_speed(iter/s)": 11.495281 + }, + { + "acc": 0.85152607, + "epoch": 11.028854824165915, + "grad_norm": 4.05923810617219, + "learning_rate": 4.563856470632386e-06, + "loss": 0.86453714, + "memory(GiB)": 34.67, + "step": 6110, + "train_speed(iter/s)": 10.90187 + }, + { + "acc": 0.87472191, + "epoch": 11.037871956717764, + "grad_norm": 4.997244876473967, + "learning_rate": 4.556424708507669e-06, + "loss": 0.73219786, + "memory(GiB)": 34.67, + "step": 6115, + "train_speed(iter/s)": 10.366957 + }, + { + "acc": 0.86167145, + "epoch": 11.046889089269612, + "grad_norm": 4.27729182639188, + "learning_rate": 4.548993935322059e-06, + "loss": 0.80507736, + "memory(GiB)": 34.67, + "step": 6120, + "train_speed(iter/s)": 9.883493 + }, + { + "acc": 0.84846478, + "epoch": 11.055906221821461, + "grad_norm": 3.3910721001118187, + "learning_rate": 4.541564167623612e-06, + "loss": 0.8367732, + "memory(GiB)": 34.67, + "step": 6125, + "train_speed(iter/s)": 9.44275 + }, + { + "acc": 0.85515461, + "epoch": 11.06492335437331, + "grad_norm": 3.7405187429618736, + "learning_rate": 4.534135421958145e-06, + "loss": 0.81323128, + "memory(GiB)": 34.67, + "step": 6130, + "train_speed(iter/s)": 9.040436 + }, + { + "acc": 0.85793438, + "epoch": 11.073940486925158, + "grad_norm": 3.089469841978783, + "learning_rate": 4.526707714869195e-06, + "loss": 0.7354743, + "memory(GiB)": 34.67, + "step": 6135, + "train_speed(iter/s)": 8.672552 + }, + { + "acc": 0.88629551, + "epoch": 11.082957619477007, + "grad_norm": 7.313152698406621, + "learning_rate": 4.51928106289799e-06, + "loss": 0.61730213, + "memory(GiB)": 34.67, + "step": 6140, + "train_speed(iter/s)": 8.333162 + }, + { + "acc": 0.88074913, + "epoch": 11.091974752028856, + "grad_norm": 4.177627478142328, + "learning_rate": 4.511855482583406e-06, + "loss": 0.66737604, + "memory(GiB)": 34.67, + "step": 6145, + "train_speed(iter/s)": 8.020784 + }, + { + "acc": 0.87245197, + "epoch": 11.100991884580703, + "grad_norm": 8.500306095037253, + "learning_rate": 4.504430990461934e-06, + "loss": 0.73731365, + "memory(GiB)": 34.67, + "step": 6150, + "train_speed(iter/s)": 7.730995 + }, + { + "acc": 0.85932751, + "epoch": 11.110009017132551, + "grad_norm": 5.635892801921693, + "learning_rate": 4.49700760306764e-06, + "loss": 0.76138706, + "memory(GiB)": 34.67, + "step": 6155, + "train_speed(iter/s)": 7.46189 + }, + { + "acc": 0.86015644, + "epoch": 11.1190261496844, + "grad_norm": 5.5551702753791785, + "learning_rate": 4.489585336932132e-06, + "loss": 0.87525578, + "memory(GiB)": 34.67, + "step": 6160, + "train_speed(iter/s)": 7.21102 + }, + { + "acc": 0.85932522, + "epoch": 11.128043282236249, + "grad_norm": 4.989421229267309, + "learning_rate": 4.482164208584518e-06, + "loss": 0.73783965, + "memory(GiB)": 34.67, + "step": 6165, + "train_speed(iter/s)": 6.976535 + }, + { + "acc": 0.84846373, + "epoch": 11.137060414788097, + "grad_norm": 6.395565631558778, + "learning_rate": 4.474744234551378e-06, + "loss": 0.84804173, + "memory(GiB)": 34.67, + "step": 6170, + "train_speed(iter/s)": 6.757025 + }, + { + "acc": 0.86683969, + "epoch": 11.146077547339946, + "grad_norm": 4.076153166920833, + "learning_rate": 4.467325431356711e-06, + "loss": 0.7369915, + "memory(GiB)": 34.67, + "step": 6175, + "train_speed(iter/s)": 6.551458 + }, + { + "acc": 0.89684353, + "epoch": 11.155094679891794, + "grad_norm": 3.1160145079663875, + "learning_rate": 4.459907815521921e-06, + "loss": 0.56216383, + "memory(GiB)": 34.67, + "step": 6180, + "train_speed(iter/s)": 6.351374 + }, + { + "acc": 0.88666992, + "epoch": 11.164111812443643, + "grad_norm": 5.954337282924469, + "learning_rate": 4.452491403565758e-06, + "loss": 0.64935493, + "memory(GiB)": 34.67, + "step": 6185, + "train_speed(iter/s)": 6.167535 + }, + { + "acc": 0.89458447, + "epoch": 11.173128944995492, + "grad_norm": 6.2738775389672385, + "learning_rate": 4.445076212004297e-06, + "loss": 0.60745091, + "memory(GiB)": 34.67, + "step": 6190, + "train_speed(iter/s)": 5.996881 + }, + { + "acc": 0.8762886, + "epoch": 11.18214607754734, + "grad_norm": 3.591047627327035, + "learning_rate": 4.437662257350892e-06, + "loss": 0.70382156, + "memory(GiB)": 34.67, + "step": 6195, + "train_speed(iter/s)": 5.835641 + }, + { + "acc": 0.86773987, + "epoch": 11.19116321009919, + "grad_norm": 5.914609622702315, + "learning_rate": 4.430249556116143e-06, + "loss": 0.75445595, + "memory(GiB)": 34.67, + "step": 6200, + "train_speed(iter/s)": 5.682762 + }, + { + "acc": 0.86218958, + "epoch": 11.200180342651038, + "grad_norm": 4.554153845408479, + "learning_rate": 4.42283812480786e-06, + "loss": 0.80199242, + "memory(GiB)": 34.67, + "step": 6205, + "train_speed(iter/s)": 5.538199 + }, + { + "acc": 0.89075756, + "epoch": 11.209197475202885, + "grad_norm": 5.4323685656726175, + "learning_rate": 4.415427979931023e-06, + "loss": 0.61459293, + "memory(GiB)": 34.67, + "step": 6210, + "train_speed(iter/s)": 5.401332 + }, + { + "acc": 0.87350368, + "epoch": 11.218214607754733, + "grad_norm": 4.332978436405154, + "learning_rate": 4.40801913798775e-06, + "loss": 0.76399531, + "memory(GiB)": 34.67, + "step": 6215, + "train_speed(iter/s)": 5.270833 + }, + { + "acc": 0.8791748, + "epoch": 11.227231740306582, + "grad_norm": 3.2931449207102945, + "learning_rate": 4.400611615477254e-06, + "loss": 0.70324554, + "memory(GiB)": 34.67, + "step": 6220, + "train_speed(iter/s)": 5.146818 + }, + { + "acc": 0.90632401, + "epoch": 11.23624887285843, + "grad_norm": 3.786264171328009, + "learning_rate": 4.393205428895812e-06, + "loss": 0.54246979, + "memory(GiB)": 34.67, + "step": 6225, + "train_speed(iter/s)": 5.028875 + }, + { + "acc": 0.87950859, + "epoch": 11.24526600541028, + "grad_norm": 4.913441290267515, + "learning_rate": 4.385800594736724e-06, + "loss": 0.6850523, + "memory(GiB)": 34.67, + "step": 6230, + "train_speed(iter/s)": 4.916327 + }, + { + "acc": 0.89150009, + "epoch": 11.254283137962128, + "grad_norm": 6.077508029370644, + "learning_rate": 4.37839712949028e-06, + "loss": 0.63446875, + "memory(GiB)": 34.67, + "step": 6235, + "train_speed(iter/s)": 4.808878 + }, + { + "acc": 0.91192722, + "epoch": 11.263300270513977, + "grad_norm": 3.003061954894193, + "learning_rate": 4.370995049643723e-06, + "loss": 0.55151949, + "memory(GiB)": 34.67, + "step": 6240, + "train_speed(iter/s)": 4.706148 + }, + { + "acc": 0.87685108, + "epoch": 11.272317403065825, + "grad_norm": 4.869795131771389, + "learning_rate": 4.363594371681206e-06, + "loss": 0.71072888, + "memory(GiB)": 34.67, + "step": 6245, + "train_speed(iter/s)": 4.608017 + }, + { + "acc": 0.87757835, + "epoch": 11.281334535617674, + "grad_norm": 3.144467522698747, + "learning_rate": 4.356195112083764e-06, + "loss": 0.68076372, + "memory(GiB)": 34.67, + "step": 6250, + "train_speed(iter/s)": 4.513877 + }, + { + "acc": 0.87459698, + "epoch": 11.290351668169523, + "grad_norm": 3.4056717021233385, + "learning_rate": 4.348797287329274e-06, + "loss": 0.66040697, + "memory(GiB)": 34.67, + "step": 6255, + "train_speed(iter/s)": 4.423773 + }, + { + "acc": 0.89757004, + "epoch": 11.299368800721371, + "grad_norm": 3.0460819816788094, + "learning_rate": 4.341400913892411e-06, + "loss": 0.57552347, + "memory(GiB)": 34.67, + "step": 6260, + "train_speed(iter/s)": 4.337417 + }, + { + "acc": 0.87836895, + "epoch": 11.30838593327322, + "grad_norm": 3.362601582141929, + "learning_rate": 4.3340060082446275e-06, + "loss": 0.69174132, + "memory(GiB)": 34.67, + "step": 6265, + "train_speed(iter/s)": 4.254369 + }, + { + "acc": 0.89809933, + "epoch": 11.317403065825067, + "grad_norm": 3.5130130069687056, + "learning_rate": 4.326612586854099e-06, + "loss": 0.625875, + "memory(GiB)": 34.67, + "step": 6270, + "train_speed(iter/s)": 4.174463 + }, + { + "acc": 0.87767344, + "epoch": 11.326420198376915, + "grad_norm": 3.5824019523244695, + "learning_rate": 4.319220666185701e-06, + "loss": 0.68774424, + "memory(GiB)": 34.67, + "step": 6275, + "train_speed(iter/s)": 4.097551 + }, + { + "acc": 0.86709652, + "epoch": 11.335437330928764, + "grad_norm": 4.182412722715768, + "learning_rate": 4.311830262700964e-06, + "loss": 0.70857172, + "memory(GiB)": 34.67, + "step": 6280, + "train_speed(iter/s)": 4.023776 + }, + { + "acc": 0.88084459, + "epoch": 11.344454463480613, + "grad_norm": 5.101241830487778, + "learning_rate": 4.304441392858043e-06, + "loss": 0.64174843, + "memory(GiB)": 34.67, + "step": 6285, + "train_speed(iter/s)": 3.952641 + }, + { + "acc": 0.90726929, + "epoch": 11.353471596032461, + "grad_norm": 9.160278188310272, + "learning_rate": 4.2970540731116725e-06, + "loss": 0.51663237, + "memory(GiB)": 34.67, + "step": 6290, + "train_speed(iter/s)": 3.884066 + }, + { + "acc": 0.8933569, + "epoch": 11.36248872858431, + "grad_norm": 3.085642138490934, + "learning_rate": 4.28966831991314e-06, + "loss": 0.57848482, + "memory(GiB)": 34.67, + "step": 6295, + "train_speed(iter/s)": 3.817883 + }, + { + "acc": 0.87269354, + "epoch": 11.371505861136159, + "grad_norm": 5.681723562526792, + "learning_rate": 4.282284149710242e-06, + "loss": 0.64417229, + "memory(GiB)": 34.67, + "step": 6300, + "train_speed(iter/s)": 3.754185 + }, + { + "acc": 0.87589941, + "epoch": 11.380522993688007, + "grad_norm": 3.431820796043091, + "learning_rate": 4.27490157894725e-06, + "loss": 0.64798918, + "memory(GiB)": 34.67, + "step": 6305, + "train_speed(iter/s)": 3.692593 + }, + { + "acc": 0.89470549, + "epoch": 11.389540126239856, + "grad_norm": 3.4942441599389817, + "learning_rate": 4.267520624064872e-06, + "loss": 0.59482164, + "memory(GiB)": 34.67, + "step": 6310, + "train_speed(iter/s)": 3.633047 + }, + { + "acc": 0.91079884, + "epoch": 11.398557258791705, + "grad_norm": 3.3775386764524638, + "learning_rate": 4.260141301500224e-06, + "loss": 0.48047276, + "memory(GiB)": 34.67, + "step": 6315, + "train_speed(iter/s)": 3.575415 + }, + { + "acc": 0.88378391, + "epoch": 11.407574391343553, + "grad_norm": 4.1693750321585314, + "learning_rate": 4.2527636276867765e-06, + "loss": 0.62509356, + "memory(GiB)": 34.67, + "step": 6320, + "train_speed(iter/s)": 3.519721 + }, + { + "acc": 0.8680604, + "epoch": 11.416591523895402, + "grad_norm": 2.759915992870974, + "learning_rate": 4.245387619054336e-06, + "loss": 0.69436426, + "memory(GiB)": 34.67, + "step": 6325, + "train_speed(iter/s)": 3.465826 + }, + { + "acc": 0.86723642, + "epoch": 11.425608656447249, + "grad_norm": 4.066922897425721, + "learning_rate": 4.238013292028999e-06, + "loss": 0.6587019, + "memory(GiB)": 34.67, + "step": 6330, + "train_speed(iter/s)": 3.413544 + }, + { + "acc": 0.89130516, + "epoch": 11.434625788999098, + "grad_norm": 2.7832289877775747, + "learning_rate": 4.230640663033118e-06, + "loss": 0.62211418, + "memory(GiB)": 34.67, + "step": 6335, + "train_speed(iter/s)": 3.362958 + }, + { + "acc": 0.86635342, + "epoch": 11.443642921550946, + "grad_norm": 3.3555983359646153, + "learning_rate": 4.223269748485257e-06, + "loss": 0.72348833, + "memory(GiB)": 34.67, + "step": 6340, + "train_speed(iter/s)": 3.313855 + }, + { + "acc": 0.8849308, + "epoch": 11.452660054102795, + "grad_norm": 3.4129318034164076, + "learning_rate": 4.2159005648001764e-06, + "loss": 0.64281712, + "memory(GiB)": 34.67, + "step": 6345, + "train_speed(iter/s)": 3.266216 + }, + { + "acc": 0.88074913, + "epoch": 11.461677186654644, + "grad_norm": 3.194855967646834, + "learning_rate": 4.208533128388767e-06, + "loss": 0.62555776, + "memory(GiB)": 34.67, + "step": 6350, + "train_speed(iter/s)": 3.220086 + }, + { + "acc": 0.88982649, + "epoch": 11.470694319206492, + "grad_norm": 3.317925136227602, + "learning_rate": 4.201167455658038e-06, + "loss": 0.5781611, + "memory(GiB)": 34.67, + "step": 6355, + "train_speed(iter/s)": 3.175256 + }, + { + "acc": 0.88328705, + "epoch": 11.479711451758341, + "grad_norm": 3.483165660746629, + "learning_rate": 4.193803563011065e-06, + "loss": 0.68382282, + "memory(GiB)": 34.67, + "step": 6360, + "train_speed(iter/s)": 3.131704 + }, + { + "acc": 0.89734564, + "epoch": 11.48872858431019, + "grad_norm": 3.9351604916671716, + "learning_rate": 4.186441466846966e-06, + "loss": 0.53999434, + "memory(GiB)": 34.67, + "step": 6365, + "train_speed(iter/s)": 3.089461 + }, + { + "acc": 0.90035248, + "epoch": 11.497745716862038, + "grad_norm": 3.457828710313778, + "learning_rate": 4.17908118356085e-06, + "loss": 0.58038116, + "memory(GiB)": 34.67, + "step": 6370, + "train_speed(iter/s)": 3.048327 + }, + { + "acc": 0.90201874, + "epoch": 11.506762849413887, + "grad_norm": 4.529124402650824, + "learning_rate": 4.171722729543797e-06, + "loss": 0.50730953, + "memory(GiB)": 34.67, + "step": 6375, + "train_speed(iter/s)": 3.008356 + }, + { + "acc": 0.89611263, + "epoch": 11.515779981965736, + "grad_norm": 3.523044991804961, + "learning_rate": 4.164366121182807e-06, + "loss": 0.58241019, + "memory(GiB)": 34.67, + "step": 6380, + "train_speed(iter/s)": 2.969551 + }, + { + "acc": 0.90218925, + "epoch": 11.524797114517582, + "grad_norm": 2.8492217525856893, + "learning_rate": 4.157011374860774e-06, + "loss": 0.53789234, + "memory(GiB)": 34.67, + "step": 6385, + "train_speed(iter/s)": 2.931828 + }, + { + "acc": 0.89348812, + "epoch": 11.533814247069431, + "grad_norm": 3.2999194874628395, + "learning_rate": 4.149658506956444e-06, + "loss": 0.55231614, + "memory(GiB)": 34.67, + "step": 6390, + "train_speed(iter/s)": 2.895088 + }, + { + "acc": 0.88687525, + "epoch": 11.54283137962128, + "grad_norm": 5.519207580798505, + "learning_rate": 4.142307533844378e-06, + "loss": 0.5719964, + "memory(GiB)": 34.67, + "step": 6395, + "train_speed(iter/s)": 2.859315 + }, + { + "acc": 0.87307539, + "epoch": 11.551848512173128, + "grad_norm": 3.2697999679942833, + "learning_rate": 4.13495847189492e-06, + "loss": 0.63465419, + "memory(GiB)": 34.67, + "step": 6400, + "train_speed(iter/s)": 2.824461 + }, + { + "acc": 0.89108429, + "epoch": 11.560865644724977, + "grad_norm": 3.439456573595362, + "learning_rate": 4.1276113374741555e-06, + "loss": 0.60180283, + "memory(GiB)": 34.67, + "step": 6405, + "train_speed(iter/s)": 2.790087 + }, + { + "acc": 0.88042135, + "epoch": 11.569882777276826, + "grad_norm": 2.7209605662247496, + "learning_rate": 4.120266146943882e-06, + "loss": 0.65857315, + "memory(GiB)": 34.67, + "step": 6410, + "train_speed(iter/s)": 2.756114 + }, + { + "acc": 0.88512735, + "epoch": 11.578899909828674, + "grad_norm": 3.687439173536713, + "learning_rate": 4.112922916661561e-06, + "loss": 0.63806605, + "memory(GiB)": 34.67, + "step": 6415, + "train_speed(iter/s)": 2.723805 + }, + { + "acc": 0.89790583, + "epoch": 11.587917042380523, + "grad_norm": 5.283415110789133, + "learning_rate": 4.105581662980295e-06, + "loss": 0.5472147, + "memory(GiB)": 34.67, + "step": 6420, + "train_speed(iter/s)": 2.692311 + }, + { + "acc": 0.89327641, + "epoch": 11.596934174932372, + "grad_norm": 2.171938859098593, + "learning_rate": 4.098242402248779e-06, + "loss": 0.58158698, + "memory(GiB)": 34.67, + "step": 6425, + "train_speed(iter/s)": 2.661569 + }, + { + "acc": 0.88704071, + "epoch": 11.60595130748422, + "grad_norm": 3.773846594518904, + "learning_rate": 4.090905150811277e-06, + "loss": 0.56592655, + "memory(GiB)": 34.67, + "step": 6430, + "train_speed(iter/s)": 2.631583 + }, + { + "acc": 0.88291311, + "epoch": 11.614968440036069, + "grad_norm": 9.70034059919439, + "learning_rate": 4.083569925007571e-06, + "loss": 0.61799598, + "memory(GiB)": 34.67, + "step": 6435, + "train_speed(iter/s)": 2.602311 + }, + { + "acc": 0.8969924, + "epoch": 11.623985572587918, + "grad_norm": 2.46772646889123, + "learning_rate": 4.076236741172937e-06, + "loss": 0.57910976, + "memory(GiB)": 34.67, + "step": 6440, + "train_speed(iter/s)": 2.573708 + }, + { + "acc": 0.89131985, + "epoch": 11.633002705139766, + "grad_norm": 2.839504986424735, + "learning_rate": 4.068905615638101e-06, + "loss": 0.55641756, + "memory(GiB)": 34.67, + "step": 6445, + "train_speed(iter/s)": 2.54581 + }, + { + "acc": 0.89318829, + "epoch": 11.642019837691613, + "grad_norm": 3.738850204580844, + "learning_rate": 4.061576564729205e-06, + "loss": 0.58655119, + "memory(GiB)": 34.67, + "step": 6450, + "train_speed(iter/s)": 2.518519 + }, + { + "acc": 0.8902998, + "epoch": 11.651036970243462, + "grad_norm": 3.086625314999831, + "learning_rate": 4.0542496047677736e-06, + "loss": 0.56620655, + "memory(GiB)": 34.67, + "step": 6455, + "train_speed(iter/s)": 2.49192 + }, + { + "acc": 0.90637522, + "epoch": 11.66005410279531, + "grad_norm": 2.7876811806222728, + "learning_rate": 4.046924752070674e-06, + "loss": 0.52601414, + "memory(GiB)": 34.67, + "step": 6460, + "train_speed(iter/s)": 2.465954 + }, + { + "acc": 0.88894434, + "epoch": 11.66907123534716, + "grad_norm": 2.7121026127735903, + "learning_rate": 4.03960202295008e-06, + "loss": 0.58678231, + "memory(GiB)": 34.67, + "step": 6465, + "train_speed(iter/s)": 2.44045 + }, + { + "acc": 0.90352039, + "epoch": 11.678088367899008, + "grad_norm": 2.8581865030348097, + "learning_rate": 4.032281433713433e-06, + "loss": 0.5328721, + "memory(GiB)": 34.67, + "step": 6470, + "train_speed(iter/s)": 2.41557 + }, + { + "acc": 0.90520477, + "epoch": 11.687105500450857, + "grad_norm": 2.1320453957594507, + "learning_rate": 4.024963000663416e-06, + "loss": 0.53016138, + "memory(GiB)": 34.67, + "step": 6475, + "train_speed(iter/s)": 2.391213 + }, + { + "acc": 0.89854393, + "epoch": 11.696122633002705, + "grad_norm": 3.8378194348058896, + "learning_rate": 4.017646740097904e-06, + "loss": 0.62174253, + "memory(GiB)": 34.67, + "step": 6480, + "train_speed(iter/s)": 2.367355 + }, + { + "acc": 0.87098484, + "epoch": 11.705139765554554, + "grad_norm": 4.875289121619593, + "learning_rate": 4.010332668309936e-06, + "loss": 0.71115298, + "memory(GiB)": 34.67, + "step": 6485, + "train_speed(iter/s)": 2.343979 + }, + { + "acc": 0.88149166, + "epoch": 11.714156898106403, + "grad_norm": 3.612520836313393, + "learning_rate": 4.003020801587679e-06, + "loss": 0.60402894, + "memory(GiB)": 34.67, + "step": 6490, + "train_speed(iter/s)": 2.321111 + }, + { + "acc": 0.88029213, + "epoch": 11.723174030658251, + "grad_norm": 2.8555388407255604, + "learning_rate": 3.995711156214386e-06, + "loss": 0.63368893, + "memory(GiB)": 34.67, + "step": 6495, + "train_speed(iter/s)": 2.29874 + }, + { + "acc": 0.89712105, + "epoch": 11.7321911632101, + "grad_norm": 3.4431630187392335, + "learning_rate": 3.9884037484683654e-06, + "loss": 0.49287953, + "memory(GiB)": 34.67, + "step": 6500, + "train_speed(iter/s)": 2.276833 + }, + { + "acc": 0.89483719, + "epoch": 11.741208295761947, + "grad_norm": 3.116246226804721, + "learning_rate": 3.981098594622941e-06, + "loss": 0.57070303, + "memory(GiB)": 34.67, + "step": 6505, + "train_speed(iter/s)": 2.255368 + }, + { + "acc": 0.89828339, + "epoch": 11.750225428313795, + "grad_norm": 4.168551371962979, + "learning_rate": 3.973795710946418e-06, + "loss": 0.58798347, + "memory(GiB)": 34.67, + "step": 6510, + "train_speed(iter/s)": 2.234309 + }, + { + "acc": 0.8866642, + "epoch": 11.759242560865644, + "grad_norm": 5.510974999962866, + "learning_rate": 3.966495113702046e-06, + "loss": 0.60468707, + "memory(GiB)": 34.67, + "step": 6515, + "train_speed(iter/s)": 2.213712 + }, + { + "acc": 0.90433388, + "epoch": 11.768259693417493, + "grad_norm": 3.700239501504165, + "learning_rate": 3.959196819147983e-06, + "loss": 0.56499805, + "memory(GiB)": 34.67, + "step": 6520, + "train_speed(iter/s)": 2.193534 + }, + { + "acc": 0.89157143, + "epoch": 11.777276825969341, + "grad_norm": 2.9027667808878728, + "learning_rate": 3.951900843537258e-06, + "loss": 0.55809302, + "memory(GiB)": 34.67, + "step": 6525, + "train_speed(iter/s)": 2.173749 + }, + { + "acc": 0.88304062, + "epoch": 11.78629395852119, + "grad_norm": 2.7546405080588374, + "learning_rate": 3.944607203117736e-06, + "loss": 0.60958209, + "memory(GiB)": 34.67, + "step": 6530, + "train_speed(iter/s)": 2.154292 + }, + { + "acc": 0.89274206, + "epoch": 11.795311091073039, + "grad_norm": 2.9457983338903353, + "learning_rate": 3.937315914132086e-06, + "loss": 0.6001379, + "memory(GiB)": 34.67, + "step": 6535, + "train_speed(iter/s)": 2.13522 + }, + { + "acc": 0.88494377, + "epoch": 11.804328223624887, + "grad_norm": 3.08912343148247, + "learning_rate": 3.930026992817731e-06, + "loss": 0.62496905, + "memory(GiB)": 34.67, + "step": 6540, + "train_speed(iter/s)": 2.116564 + }, + { + "acc": 0.901404, + "epoch": 11.813345356176736, + "grad_norm": 2.7543128788149156, + "learning_rate": 3.922740455406831e-06, + "loss": 0.53852654, + "memory(GiB)": 34.67, + "step": 6545, + "train_speed(iter/s)": 2.09826 + }, + { + "acc": 0.87935314, + "epoch": 11.822362488728585, + "grad_norm": 3.011957805835299, + "learning_rate": 3.915456318126231e-06, + "loss": 0.64246588, + "memory(GiB)": 34.67, + "step": 6550, + "train_speed(iter/s)": 2.080237 + }, + { + "acc": 0.90037899, + "epoch": 11.831379621280433, + "grad_norm": 2.303245000726657, + "learning_rate": 3.908174597197433e-06, + "loss": 0.56113682, + "memory(GiB)": 34.67, + "step": 6555, + "train_speed(iter/s)": 2.062574 + }, + { + "acc": 0.89349146, + "epoch": 11.840396753832282, + "grad_norm": 2.946873472049538, + "learning_rate": 3.900895308836558e-06, + "loss": 0.58248949, + "memory(GiB)": 34.67, + "step": 6560, + "train_speed(iter/s)": 2.045216 + }, + { + "acc": 0.91550884, + "epoch": 11.84941388638413, + "grad_norm": 4.106085417976899, + "learning_rate": 3.893618469254311e-06, + "loss": 0.48115005, + "memory(GiB)": 34.67, + "step": 6565, + "train_speed(iter/s)": 2.02817 + }, + { + "acc": 0.89332514, + "epoch": 11.858431018935978, + "grad_norm": 4.238738103103749, + "learning_rate": 3.886344094655941e-06, + "loss": 0.56668701, + "memory(GiB)": 34.67, + "step": 6570, + "train_speed(iter/s)": 2.011458 + }, + { + "acc": 0.90490742, + "epoch": 11.867448151487826, + "grad_norm": 3.126016628257221, + "learning_rate": 3.879072201241209e-06, + "loss": 0.51700487, + "memory(GiB)": 34.67, + "step": 6575, + "train_speed(iter/s)": 1.995037 + }, + { + "acc": 0.89984188, + "epoch": 11.876465284039675, + "grad_norm": 1.841121350103198, + "learning_rate": 3.871802805204352e-06, + "loss": 0.53324027, + "memory(GiB)": 34.67, + "step": 6580, + "train_speed(iter/s)": 1.978906 + }, + { + "acc": 0.8948185, + "epoch": 11.885482416591524, + "grad_norm": 4.011181853907612, + "learning_rate": 3.86453592273404e-06, + "loss": 0.58295531, + "memory(GiB)": 34.67, + "step": 6585, + "train_speed(iter/s)": 1.963059 + }, + { + "acc": 0.90139599, + "epoch": 11.894499549143372, + "grad_norm": 5.673338290492078, + "learning_rate": 3.857271570013353e-06, + "loss": 0.55692811, + "memory(GiB)": 34.67, + "step": 6590, + "train_speed(iter/s)": 1.947502 + }, + { + "acc": 0.8901413, + "epoch": 11.903516681695221, + "grad_norm": 4.086904198177409, + "learning_rate": 3.8500097632197326e-06, + "loss": 0.57388477, + "memory(GiB)": 34.67, + "step": 6595, + "train_speed(iter/s)": 1.932209 + }, + { + "acc": 0.90845718, + "epoch": 11.91253381424707, + "grad_norm": 3.093457960696665, + "learning_rate": 3.842750518524949e-06, + "loss": 0.49866705, + "memory(GiB)": 34.67, + "step": 6600, + "train_speed(iter/s)": 1.917176 + }, + { + "acc": 0.89074831, + "epoch": 11.921550946798918, + "grad_norm": 4.125866495526902, + "learning_rate": 3.835493852095073e-06, + "loss": 0.55677662, + "memory(GiB)": 34.67, + "step": 6605, + "train_speed(iter/s)": 1.902415 + }, + { + "acc": 0.88955154, + "epoch": 11.930568079350767, + "grad_norm": 2.0918888964564273, + "learning_rate": 3.8282397800904275e-06, + "loss": 0.57829523, + "memory(GiB)": 34.67, + "step": 6610, + "train_speed(iter/s)": 1.887899 + }, + { + "acc": 0.88710747, + "epoch": 11.939585211902616, + "grad_norm": 3.713753590744293, + "learning_rate": 3.820988318665563e-06, + "loss": 0.597786, + "memory(GiB)": 34.67, + "step": 6615, + "train_speed(iter/s)": 1.873629 + }, + { + "acc": 0.89293966, + "epoch": 11.948602344454464, + "grad_norm": 3.392903276630068, + "learning_rate": 3.81373948396921e-06, + "loss": 0.54176407, + "memory(GiB)": 34.67, + "step": 6620, + "train_speed(iter/s)": 1.859592 + }, + { + "acc": 0.89268913, + "epoch": 11.957619477006311, + "grad_norm": 2.4845625647116334, + "learning_rate": 3.8064932921442553e-06, + "loss": 0.57533016, + "memory(GiB)": 34.67, + "step": 6625, + "train_speed(iter/s)": 1.845791 + }, + { + "acc": 0.89840698, + "epoch": 11.96663660955816, + "grad_norm": 2.7133497223785823, + "learning_rate": 3.799249759327697e-06, + "loss": 0.54027977, + "memory(GiB)": 34.67, + "step": 6630, + "train_speed(iter/s)": 1.831589 + }, + { + "acc": 0.88528519, + "epoch": 11.975653742110008, + "grad_norm": 4.907637284645984, + "learning_rate": 3.792008901650613e-06, + "loss": 0.6101069, + "memory(GiB)": 34.67, + "step": 6635, + "train_speed(iter/s)": 1.818238 + }, + { + "acc": 0.90314674, + "epoch": 11.984670874661857, + "grad_norm": 3.494447524122149, + "learning_rate": 3.7847707352381213e-06, + "loss": 0.50569868, + "memory(GiB)": 34.67, + "step": 6640, + "train_speed(iter/s)": 1.805082 + }, + { + "acc": 0.87980185, + "epoch": 11.993688007213706, + "grad_norm": 5.334889538473327, + "learning_rate": 3.777535276209352e-06, + "loss": 0.64969778, + "memory(GiB)": 34.67, + "step": 6645, + "train_speed(iter/s)": 1.792159 + }, + { + "epoch": 12.0, + "eval_acc": 0.8994181213632585, + "eval_loss": 0.2734980285167694, + "eval_runtime": 50.0068, + "eval_samples_per_second": 9.339, + "eval_steps_per_second": 1.18, + "step": 6649 + }, + { + "acc": 0.81350288, + "epoch": 12.001803426510369, + "grad_norm": 2.575214669644955, + "learning_rate": 3.770302540677399e-06, + "loss": 0.45334911, + "memory(GiB)": 34.67, + "step": 6650, + "train_speed(iter/s)": 1.733284 + }, + { + "acc": 0.90637493, + "epoch": 12.010820559062218, + "grad_norm": 3.4925685024652893, + "learning_rate": 3.763072544749297e-06, + "loss": 0.51629629, + "memory(GiB)": 34.67, + "step": 6655, + "train_speed(iter/s)": 1.721439 + }, + { + "acc": 0.92434101, + "epoch": 12.019837691614066, + "grad_norm": 2.039044261617016, + "learning_rate": 3.7558453045259762e-06, + "loss": 0.42532778, + "memory(GiB)": 34.67, + "step": 6660, + "train_speed(iter/s)": 1.709787 + }, + { + "acc": 0.89824448, + "epoch": 12.028854824165915, + "grad_norm": 2.1155265660242306, + "learning_rate": 3.74862083610223e-06, + "loss": 0.54138293, + "memory(GiB)": 34.67, + "step": 6665, + "train_speed(iter/s)": 1.698271 + }, + { + "acc": 0.91347828, + "epoch": 12.037871956717764, + "grad_norm": 3.418855024878286, + "learning_rate": 3.74139915556668e-06, + "loss": 0.48412566, + "memory(GiB)": 34.67, + "step": 6670, + "train_speed(iter/s)": 1.68692 + }, + { + "acc": 0.89754686, + "epoch": 12.046889089269612, + "grad_norm": 3.3779581181312692, + "learning_rate": 3.7341802790017427e-06, + "loss": 0.5130682, + "memory(GiB)": 34.67, + "step": 6675, + "train_speed(iter/s)": 1.675744 + }, + { + "acc": 0.91587677, + "epoch": 12.055906221821461, + "grad_norm": 5.789073567685528, + "learning_rate": 3.7269642224835843e-06, + "loss": 0.44497404, + "memory(GiB)": 34.67, + "step": 6680, + "train_speed(iter/s)": 1.664737 + }, + { + "acc": 0.89080715, + "epoch": 12.06492335437331, + "grad_norm": 3.831420518156388, + "learning_rate": 3.719751002082094e-06, + "loss": 0.55942383, + "memory(GiB)": 34.67, + "step": 6685, + "train_speed(iter/s)": 1.65391 + }, + { + "acc": 0.91208267, + "epoch": 12.073940486925158, + "grad_norm": 5.172936670113741, + "learning_rate": 3.7125406338608438e-06, + "loss": 0.44970398, + "memory(GiB)": 34.67, + "step": 6690, + "train_speed(iter/s)": 1.643196 + }, + { + "acc": 0.92443676, + "epoch": 12.082957619477007, + "grad_norm": 2.8504321431159267, + "learning_rate": 3.7053331338770557e-06, + "loss": 0.40664091, + "memory(GiB)": 34.67, + "step": 6695, + "train_speed(iter/s)": 1.632677 + }, + { + "acc": 0.91076431, + "epoch": 12.091974752028856, + "grad_norm": 3.604219094120456, + "learning_rate": 3.698128518181563e-06, + "loss": 0.45517664, + "memory(GiB)": 34.67, + "step": 6700, + "train_speed(iter/s)": 1.622263 + }, + { + "acc": 0.89761791, + "epoch": 12.100991884580703, + "grad_norm": 3.6072006583559166, + "learning_rate": 3.6909268028187756e-06, + "loss": 0.51308622, + "memory(GiB)": 34.67, + "step": 6705, + "train_speed(iter/s)": 1.612019 + }, + { + "acc": 0.90031252, + "epoch": 12.110009017132551, + "grad_norm": 3.28548310243231, + "learning_rate": 3.6837280038266453e-06, + "loss": 0.52073536, + "memory(GiB)": 34.67, + "step": 6710, + "train_speed(iter/s)": 1.601921 + }, + { + "acc": 0.88845291, + "epoch": 12.1190261496844, + "grad_norm": 2.7131167802900142, + "learning_rate": 3.6765321372366295e-06, + "loss": 0.58866653, + "memory(GiB)": 34.67, + "step": 6715, + "train_speed(iter/s)": 1.591964 + }, + { + "acc": 0.89709702, + "epoch": 12.128043282236249, + "grad_norm": 3.324104635839056, + "learning_rate": 3.669339219073653e-06, + "loss": 0.5729095, + "memory(GiB)": 34.67, + "step": 6720, + "train_speed(iter/s)": 1.582143 + }, + { + "acc": 0.90601044, + "epoch": 12.137060414788097, + "grad_norm": 3.9513577185198887, + "learning_rate": 3.6621492653560786e-06, + "loss": 0.51798992, + "memory(GiB)": 34.67, + "step": 6725, + "train_speed(iter/s)": 1.572461 + }, + { + "acc": 0.90220985, + "epoch": 12.146077547339946, + "grad_norm": 2.743185886658063, + "learning_rate": 3.654962292095663e-06, + "loss": 0.53028731, + "memory(GiB)": 34.67, + "step": 6730, + "train_speed(iter/s)": 1.562919 + }, + { + "acc": 0.90385151, + "epoch": 12.155094679891794, + "grad_norm": 3.8011841848228323, + "learning_rate": 3.647778315297528e-06, + "loss": 0.49340973, + "memory(GiB)": 34.67, + "step": 6735, + "train_speed(iter/s)": 1.553487 + }, + { + "acc": 0.89563923, + "epoch": 12.164111812443643, + "grad_norm": 4.042298265702815, + "learning_rate": 3.6405973509601235e-06, + "loss": 0.57576737, + "memory(GiB)": 34.67, + "step": 6740, + "train_speed(iter/s)": 1.544179 + }, + { + "acc": 0.90624695, + "epoch": 12.173128944995492, + "grad_norm": 2.2936041552366215, + "learning_rate": 3.633419415075186e-06, + "loss": 0.52801132, + "memory(GiB)": 34.67, + "step": 6745, + "train_speed(iter/s)": 1.534999 + }, + { + "acc": 0.89541893, + "epoch": 12.18214607754734, + "grad_norm": 2.0906405577820473, + "learning_rate": 3.6262445236277153e-06, + "loss": 0.55127258, + "memory(GiB)": 34.67, + "step": 6750, + "train_speed(iter/s)": 1.525961 + }, + { + "acc": 0.91594315, + "epoch": 12.19116321009919, + "grad_norm": 2.1409635042593913, + "learning_rate": 3.6190726925959222e-06, + "loss": 0.43182116, + "memory(GiB)": 34.67, + "step": 6755, + "train_speed(iter/s)": 1.517033 + }, + { + "acc": 0.90597992, + "epoch": 12.200180342651038, + "grad_norm": 2.926711327961975, + "learning_rate": 3.6119039379512115e-06, + "loss": 0.47865562, + "memory(GiB)": 34.67, + "step": 6760, + "train_speed(iter/s)": 1.508237 + }, + { + "acc": 0.90510683, + "epoch": 12.209197475202885, + "grad_norm": 2.604994122390104, + "learning_rate": 3.6047382756581285e-06, + "loss": 0.53902903, + "memory(GiB)": 34.67, + "step": 6765, + "train_speed(iter/s)": 1.499534 + }, + { + "acc": 0.91268291, + "epoch": 12.218214607754733, + "grad_norm": 1.9489388242574992, + "learning_rate": 3.5975757216743377e-06, + "loss": 0.42435141, + "memory(GiB)": 34.67, + "step": 6770, + "train_speed(iter/s)": 1.490931 + }, + { + "acc": 0.89771376, + "epoch": 12.227231740306582, + "grad_norm": 2.997573524241079, + "learning_rate": 3.5904162919505788e-06, + "loss": 0.57115669, + "memory(GiB)": 34.67, + "step": 6775, + "train_speed(iter/s)": 1.482461 + }, + { + "acc": 0.92218046, + "epoch": 12.23624887285843, + "grad_norm": 2.4411178735663697, + "learning_rate": 3.5832600024306345e-06, + "loss": 0.41223426, + "memory(GiB)": 34.67, + "step": 6780, + "train_speed(iter/s)": 1.474083 + }, + { + "acc": 0.88631496, + "epoch": 12.24526600541028, + "grad_norm": 4.293779878220328, + "learning_rate": 3.5761068690512978e-06, + "loss": 0.61934638, + "memory(GiB)": 34.67, + "step": 6785, + "train_speed(iter/s)": 1.465822 + }, + { + "acc": 0.90367622, + "epoch": 12.254283137962128, + "grad_norm": 2.0034139313461417, + "learning_rate": 3.5689569077423262e-06, + "loss": 0.51608496, + "memory(GiB)": 34.67, + "step": 6790, + "train_speed(iter/s)": 1.457684 + }, + { + "acc": 0.9154211, + "epoch": 12.263300270513977, + "grad_norm": 3.3568476733945207, + "learning_rate": 3.5618101344264176e-06, + "loss": 0.4631865, + "memory(GiB)": 34.67, + "step": 6795, + "train_speed(iter/s)": 1.449617 + }, + { + "acc": 0.90154781, + "epoch": 12.272317403065825, + "grad_norm": 2.1888321459224094, + "learning_rate": 3.5546665650191707e-06, + "loss": 0.51787224, + "memory(GiB)": 34.67, + "step": 6800, + "train_speed(iter/s)": 1.441671 + }, + { + "acc": 0.90700722, + "epoch": 12.281334535617674, + "grad_norm": 2.292007057868882, + "learning_rate": 3.5475262154290474e-06, + "loss": 0.45840693, + "memory(GiB)": 34.67, + "step": 6805, + "train_speed(iter/s)": 1.433802 + }, + { + "acc": 0.89161024, + "epoch": 12.290351668169523, + "grad_norm": 2.8701291268559577, + "learning_rate": 3.5403891015573407e-06, + "loss": 0.56402502, + "memory(GiB)": 34.67, + "step": 6810, + "train_speed(iter/s)": 1.426029 + }, + { + "acc": 0.90689182, + "epoch": 12.299368800721371, + "grad_norm": 3.4158428604143443, + "learning_rate": 3.533255239298135e-06, + "loss": 0.48761907, + "memory(GiB)": 34.67, + "step": 6815, + "train_speed(iter/s)": 1.418349 + }, + { + "acc": 0.88538704, + "epoch": 12.30838593327322, + "grad_norm": 2.744623166849405, + "learning_rate": 3.5261246445382768e-06, + "loss": 0.59895587, + "memory(GiB)": 34.67, + "step": 6820, + "train_speed(iter/s)": 1.410775 + }, + { + "acc": 0.90757618, + "epoch": 12.317403065825067, + "grad_norm": 2.348826790027136, + "learning_rate": 3.5189973331573347e-06, + "loss": 0.49169731, + "memory(GiB)": 34.67, + "step": 6825, + "train_speed(iter/s)": 1.403295 + }, + { + "acc": 0.90024757, + "epoch": 12.326420198376915, + "grad_norm": 3.4637134352625245, + "learning_rate": 3.5118733210275653e-06, + "loss": 0.53719163, + "memory(GiB)": 34.67, + "step": 6830, + "train_speed(iter/s)": 1.395908 + }, + { + "acc": 0.90511379, + "epoch": 12.335437330928764, + "grad_norm": 3.1429330020547877, + "learning_rate": 3.5047526240138756e-06, + "loss": 0.48838396, + "memory(GiB)": 34.67, + "step": 6835, + "train_speed(iter/s)": 1.388601 + }, + { + "acc": 0.89170218, + "epoch": 12.344454463480613, + "grad_norm": 2.77515228069174, + "learning_rate": 3.4976352579737966e-06, + "loss": 0.56038818, + "memory(GiB)": 34.67, + "step": 6840, + "train_speed(iter/s)": 1.381386 + }, + { + "acc": 0.88870592, + "epoch": 12.353471596032461, + "grad_norm": 2.612572066007513, + "learning_rate": 3.490521238757432e-06, + "loss": 0.58700676, + "memory(GiB)": 34.67, + "step": 6845, + "train_speed(iter/s)": 1.374257 + }, + { + "acc": 0.89043579, + "epoch": 12.36248872858431, + "grad_norm": 2.0526368361926117, + "learning_rate": 3.483410582207442e-06, + "loss": 0.56371317, + "memory(GiB)": 34.67, + "step": 6850, + "train_speed(iter/s)": 1.366813 + }, + { + "acc": 0.9151659, + "epoch": 12.371505861136159, + "grad_norm": 2.4417612779693547, + "learning_rate": 3.4763033041589896e-06, + "loss": 0.43916836, + "memory(GiB)": 34.67, + "step": 6855, + "train_speed(iter/s)": 1.359454 + }, + { + "acc": 0.91412525, + "epoch": 12.380522993688007, + "grad_norm": 3.774629777758544, + "learning_rate": 3.4691994204397204e-06, + "loss": 0.45177279, + "memory(GiB)": 34.67, + "step": 6860, + "train_speed(iter/s)": 1.352595 + }, + { + "acc": 0.90705338, + "epoch": 12.389540126239856, + "grad_norm": 3.0208576391297237, + "learning_rate": 3.4620989468697165e-06, + "loss": 0.49362307, + "memory(GiB)": 34.67, + "step": 6865, + "train_speed(iter/s)": 1.345799 + }, + { + "acc": 0.92138443, + "epoch": 12.398557258791705, + "grad_norm": 2.6168496358492157, + "learning_rate": 3.455001899261469e-06, + "loss": 0.47534761, + "memory(GiB)": 34.67, + "step": 6870, + "train_speed(iter/s)": 1.339063 + }, + { + "acc": 0.9038641, + "epoch": 12.407574391343553, + "grad_norm": 2.8679557193192022, + "learning_rate": 3.4479082934198378e-06, + "loss": 0.50212803, + "memory(GiB)": 34.67, + "step": 6875, + "train_speed(iter/s)": 1.332421 + }, + { + "acc": 0.9250123, + "epoch": 12.416591523895402, + "grad_norm": 3.3074929546790988, + "learning_rate": 3.4408181451420184e-06, + "loss": 0.40797939, + "memory(GiB)": 34.67, + "step": 6880, + "train_speed(iter/s)": 1.325851 + }, + { + "acc": 0.87166634, + "epoch": 12.425608656447249, + "grad_norm": 2.8634096224537005, + "learning_rate": 3.4337314702175056e-06, + "loss": 0.62051229, + "memory(GiB)": 34.67, + "step": 6885, + "train_speed(iter/s)": 1.319368 + }, + { + "acc": 0.89600964, + "epoch": 12.434625788999098, + "grad_norm": 3.5263444180073056, + "learning_rate": 3.426648284428063e-06, + "loss": 0.55068827, + "memory(GiB)": 34.67, + "step": 6890, + "train_speed(iter/s)": 1.312945 + }, + { + "acc": 0.90047712, + "epoch": 12.443642921550946, + "grad_norm": 2.5930249467829225, + "learning_rate": 3.419568603547678e-06, + "loss": 0.49448376, + "memory(GiB)": 34.67, + "step": 6895, + "train_speed(iter/s)": 1.306577 + }, + { + "acc": 0.92914753, + "epoch": 12.452660054102795, + "grad_norm": 2.24287860956484, + "learning_rate": 3.412492443342538e-06, + "loss": 0.39698575, + "memory(GiB)": 34.67, + "step": 6900, + "train_speed(iter/s)": 1.300308 + }, + { + "acc": 0.88429451, + "epoch": 12.461677186654644, + "grad_norm": 4.0189329106208795, + "learning_rate": 3.405419819570986e-06, + "loss": 0.58602076, + "memory(GiB)": 34.67, + "step": 6905, + "train_speed(iter/s)": 1.294077 + }, + { + "acc": 0.89383793, + "epoch": 12.470694319206492, + "grad_norm": 4.231128706089549, + "learning_rate": 3.398350747983493e-06, + "loss": 0.57240362, + "memory(GiB)": 34.67, + "step": 6910, + "train_speed(iter/s)": 1.287912 + }, + { + "acc": 0.91105804, + "epoch": 12.479711451758341, + "grad_norm": 2.676913257049845, + "learning_rate": 3.391285244322616e-06, + "loss": 0.47283764, + "memory(GiB)": 34.67, + "step": 6915, + "train_speed(iter/s)": 1.281821 + }, + { + "acc": 0.9111577, + "epoch": 12.48872858431019, + "grad_norm": 1.7914679032768803, + "learning_rate": 3.38422332432297e-06, + "loss": 0.4607234, + "memory(GiB)": 34.67, + "step": 6920, + "train_speed(iter/s)": 1.275814 + }, + { + "acc": 0.8947032, + "epoch": 12.497745716862038, + "grad_norm": 2.08201057362077, + "learning_rate": 3.377165003711185e-06, + "loss": 0.55225325, + "memory(GiB)": 34.67, + "step": 6925, + "train_speed(iter/s)": 1.269869 + }, + { + "acc": 0.90196323, + "epoch": 12.506762849413887, + "grad_norm": 2.3669301135549947, + "learning_rate": 3.37011029820588e-06, + "loss": 0.48145366, + "memory(GiB)": 34.67, + "step": 6930, + "train_speed(iter/s)": 1.263993 + }, + { + "acc": 0.88760262, + "epoch": 12.515779981965736, + "grad_norm": 3.5300593443343566, + "learning_rate": 3.36305922351762e-06, + "loss": 0.61123838, + "memory(GiB)": 34.67, + "step": 6935, + "train_speed(iter/s)": 1.258166 + }, + { + "acc": 0.89373741, + "epoch": 12.524797114517582, + "grad_norm": 2.9678437185920052, + "learning_rate": 3.3560117953488847e-06, + "loss": 0.54196587, + "memory(GiB)": 34.67, + "step": 6940, + "train_speed(iter/s)": 1.252408 + }, + { + "acc": 0.90031862, + "epoch": 12.533814247069431, + "grad_norm": 2.9632955771816594, + "learning_rate": 3.348968029394035e-06, + "loss": 0.52354126, + "memory(GiB)": 34.67, + "step": 6945, + "train_speed(iter/s)": 1.246707 + }, + { + "acc": 0.91497507, + "epoch": 12.54283137962128, + "grad_norm": 3.8620935294366237, + "learning_rate": 3.3419279413392737e-06, + "loss": 0.45280676, + "memory(GiB)": 34.67, + "step": 6950, + "train_speed(iter/s)": 1.241069 + }, + { + "acc": 0.90571871, + "epoch": 12.551848512173128, + "grad_norm": 2.2996013995351725, + "learning_rate": 3.334891546862616e-06, + "loss": 0.5276722, + "memory(GiB)": 34.67, + "step": 6955, + "train_speed(iter/s)": 1.235494 + }, + { + "acc": 0.90228748, + "epoch": 12.560865644724977, + "grad_norm": 2.2964152470744357, + "learning_rate": 3.327858861633847e-06, + "loss": 0.5086122, + "memory(GiB)": 34.67, + "step": 6960, + "train_speed(iter/s)": 1.229959 + }, + { + "acc": 0.91079998, + "epoch": 12.569882777276826, + "grad_norm": 3.6548937928907845, + "learning_rate": 3.3208299013144974e-06, + "loss": 0.49442649, + "memory(GiB)": 34.67, + "step": 6965, + "train_speed(iter/s)": 1.224506 + }, + { + "acc": 0.89158049, + "epoch": 12.578899909828674, + "grad_norm": 2.8246531241605957, + "learning_rate": 3.3138046815577973e-06, + "loss": 0.55396352, + "memory(GiB)": 34.67, + "step": 6970, + "train_speed(iter/s)": 1.219097 + }, + { + "acc": 0.90056505, + "epoch": 12.587917042380523, + "grad_norm": 2.947643468854448, + "learning_rate": 3.3067832180086513e-06, + "loss": 0.52505016, + "memory(GiB)": 34.67, + "step": 6975, + "train_speed(iter/s)": 1.213741 + }, + { + "acc": 0.90709219, + "epoch": 12.596934174932372, + "grad_norm": 2.872902939668548, + "learning_rate": 3.2997655263035942e-06, + "loss": 0.49564762, + "memory(GiB)": 34.67, + "step": 6980, + "train_speed(iter/s)": 1.208446 + }, + { + "acc": 0.91496458, + "epoch": 12.60595130748422, + "grad_norm": 2.7179826260252256, + "learning_rate": 3.2927516220707667e-06, + "loss": 0.46064801, + "memory(GiB)": 34.67, + "step": 6985, + "train_speed(iter/s)": 1.203193 + }, + { + "acc": 0.89439774, + "epoch": 12.614968440036069, + "grad_norm": 2.5661195393731386, + "learning_rate": 3.285741520929869e-06, + "loss": 0.52741041, + "memory(GiB)": 34.67, + "step": 6990, + "train_speed(iter/s)": 1.197989 + }, + { + "acc": 0.91784515, + "epoch": 12.623985572587918, + "grad_norm": 3.036083624872931, + "learning_rate": 3.2787352384921335e-06, + "loss": 0.45988255, + "memory(GiB)": 34.67, + "step": 6995, + "train_speed(iter/s)": 1.192842 + }, + { + "acc": 0.89744816, + "epoch": 12.633002705139766, + "grad_norm": 2.6472984324447726, + "learning_rate": 3.271732790360295e-06, + "loss": 0.500772, + "memory(GiB)": 34.67, + "step": 7000, + "train_speed(iter/s)": 1.187755 + }, + { + "acc": 0.91127987, + "epoch": 12.642019837691613, + "grad_norm": 2.4186789506546047, + "learning_rate": 3.264734192128542e-06, + "loss": 0.45138502, + "memory(GiB)": 34.67, + "step": 7005, + "train_speed(iter/s)": 1.182709 + }, + { + "acc": 0.91185627, + "epoch": 12.651036970243462, + "grad_norm": 1.9607647456441446, + "learning_rate": 3.2577394593824894e-06, + "loss": 0.46712923, + "memory(GiB)": 34.67, + "step": 7010, + "train_speed(iter/s)": 1.177712 + }, + { + "acc": 0.90184002, + "epoch": 12.66005410279531, + "grad_norm": 2.9091787295472407, + "learning_rate": 3.2507486076991506e-06, + "loss": 0.53244228, + "memory(GiB)": 34.67, + "step": 7015, + "train_speed(iter/s)": 1.172769 + }, + { + "acc": 0.89695921, + "epoch": 12.66907123534716, + "grad_norm": 2.4308655171528293, + "learning_rate": 3.2437616526468867e-06, + "loss": 0.49879436, + "memory(GiB)": 34.67, + "step": 7020, + "train_speed(iter/s)": 1.167865 + }, + { + "acc": 0.91786423, + "epoch": 12.678088367899008, + "grad_norm": 2.071317989419375, + "learning_rate": 3.236778609785389e-06, + "loss": 0.45005484, + "memory(GiB)": 34.67, + "step": 7025, + "train_speed(iter/s)": 1.163017 + }, + { + "acc": 0.92142124, + "epoch": 12.687105500450857, + "grad_norm": 5.121700176171818, + "learning_rate": 3.2297994946656324e-06, + "loss": 0.44252462, + "memory(GiB)": 34.67, + "step": 7030, + "train_speed(iter/s)": 1.158207 + }, + { + "acc": 0.90308228, + "epoch": 12.696122633002705, + "grad_norm": 3.0897040621180554, + "learning_rate": 3.222824322829846e-06, + "loss": 0.52320132, + "memory(GiB)": 34.67, + "step": 7035, + "train_speed(iter/s)": 1.153452 + }, + { + "acc": 0.90803566, + "epoch": 12.705139765554554, + "grad_norm": 2.6214822502930555, + "learning_rate": 3.2158531098114775e-06, + "loss": 0.45875521, + "memory(GiB)": 34.67, + "step": 7040, + "train_speed(iter/s)": 1.148741 + }, + { + "acc": 0.93123388, + "epoch": 12.714156898106403, + "grad_norm": 2.761663377941063, + "learning_rate": 3.208885871135159e-06, + "loss": 0.36535487, + "memory(GiB)": 34.67, + "step": 7045, + "train_speed(iter/s)": 1.144068 + }, + { + "acc": 0.91526604, + "epoch": 12.723174030658251, + "grad_norm": 3.5710697229953556, + "learning_rate": 3.2019226223166682e-06, + "loss": 0.45317073, + "memory(GiB)": 34.67, + "step": 7050, + "train_speed(iter/s)": 1.139451 + }, + { + "acc": 0.90850372, + "epoch": 12.7321911632101, + "grad_norm": 3.0884367350937154, + "learning_rate": 3.194963378862904e-06, + "loss": 0.5030972, + "memory(GiB)": 34.67, + "step": 7055, + "train_speed(iter/s)": 1.134869 + }, + { + "acc": 0.89610424, + "epoch": 12.741208295761947, + "grad_norm": 2.479456654265497, + "learning_rate": 3.1880081562718397e-06, + "loss": 0.52796822, + "memory(GiB)": 34.67, + "step": 7060, + "train_speed(iter/s)": 1.130343 + }, + { + "acc": 0.92005167, + "epoch": 12.750225428313795, + "grad_norm": 1.9885855087772923, + "learning_rate": 3.1810569700324984e-06, + "loss": 0.4264123, + "memory(GiB)": 34.67, + "step": 7065, + "train_speed(iter/s)": 1.125857 + }, + { + "acc": 0.9267272, + "epoch": 12.759242560865644, + "grad_norm": 2.6471495841215082, + "learning_rate": 3.1741098356249096e-06, + "loss": 0.40731611, + "memory(GiB)": 34.67, + "step": 7070, + "train_speed(iter/s)": 1.12141 + }, + { + "acc": 0.91518879, + "epoch": 12.768259693417493, + "grad_norm": 1.9664739870788763, + "learning_rate": 3.167166768520085e-06, + "loss": 0.43133411, + "memory(GiB)": 34.67, + "step": 7075, + "train_speed(iter/s)": 1.116999 + }, + { + "acc": 0.90235577, + "epoch": 12.777276825969341, + "grad_norm": 3.236070038759275, + "learning_rate": 3.1602277841799765e-06, + "loss": 0.51252337, + "memory(GiB)": 34.67, + "step": 7080, + "train_speed(iter/s)": 1.112465 + }, + { + "acc": 0.90878696, + "epoch": 12.78629395852119, + "grad_norm": 2.531316943039322, + "learning_rate": 3.1532928980574405e-06, + "loss": 0.48896437, + "memory(GiB)": 34.67, + "step": 7085, + "train_speed(iter/s)": 1.108021 + }, + { + "acc": 0.90052872, + "epoch": 12.795311091073039, + "grad_norm": 2.6102614878041304, + "learning_rate": 3.1463621255962123e-06, + "loss": 0.56132298, + "memory(GiB)": 34.67, + "step": 7090, + "train_speed(iter/s)": 1.103742 + }, + { + "acc": 0.9197155, + "epoch": 12.804328223624887, + "grad_norm": 2.0247956504089393, + "learning_rate": 3.1394354822308628e-06, + "loss": 0.41660166, + "memory(GiB)": 34.67, + "step": 7095, + "train_speed(iter/s)": 1.099499 + }, + { + "acc": 0.91555653, + "epoch": 12.813345356176736, + "grad_norm": 2.5010295438945183, + "learning_rate": 3.1325129833867686e-06, + "loss": 0.45076404, + "memory(GiB)": 34.67, + "step": 7100, + "train_speed(iter/s)": 1.095289 + }, + { + "acc": 0.90210075, + "epoch": 12.822362488728585, + "grad_norm": 2.599043666518721, + "learning_rate": 3.125594644480074e-06, + "loss": 0.52382908, + "memory(GiB)": 34.67, + "step": 7105, + "train_speed(iter/s)": 1.091122 + }, + { + "acc": 0.90552273, + "epoch": 12.831379621280433, + "grad_norm": 2.1825629567923297, + "learning_rate": 3.118680480917664e-06, + "loss": 0.4995965, + "memory(GiB)": 34.67, + "step": 7110, + "train_speed(iter/s)": 1.086986 + }, + { + "acc": 0.90266972, + "epoch": 12.840396753832282, + "grad_norm": 3.4355316990646596, + "learning_rate": 3.111770508097125e-06, + "loss": 0.49682536, + "memory(GiB)": 34.67, + "step": 7115, + "train_speed(iter/s)": 1.082889 + }, + { + "acc": 0.90070534, + "epoch": 12.84941388638413, + "grad_norm": 2.604672639680177, + "learning_rate": 3.104864741406704e-06, + "loss": 0.48613911, + "memory(GiB)": 34.67, + "step": 7120, + "train_speed(iter/s)": 1.078822 + }, + { + "acc": 0.92008801, + "epoch": 12.858431018935978, + "grad_norm": 3.6092092728467233, + "learning_rate": 3.0979631962252906e-06, + "loss": 0.45216789, + "memory(GiB)": 34.67, + "step": 7125, + "train_speed(iter/s)": 1.074797 + }, + { + "acc": 0.90443535, + "epoch": 12.867448151487826, + "grad_norm": 2.791672773984579, + "learning_rate": 3.091065887922364e-06, + "loss": 0.50383472, + "memory(GiB)": 34.67, + "step": 7130, + "train_speed(iter/s)": 1.070818 + }, + { + "acc": 0.89264803, + "epoch": 12.876465284039675, + "grad_norm": 4.007330384306319, + "learning_rate": 3.0841728318579744e-06, + "loss": 0.55332375, + "memory(GiB)": 34.67, + "step": 7135, + "train_speed(iter/s)": 1.066878 + }, + { + "acc": 0.91145496, + "epoch": 12.885482416591524, + "grad_norm": 2.5195743939618462, + "learning_rate": 3.0772840433827026e-06, + "loss": 0.45711875, + "memory(GiB)": 34.67, + "step": 7140, + "train_speed(iter/s)": 1.062969 + }, + { + "acc": 0.90285149, + "epoch": 12.894499549143372, + "grad_norm": 4.04052031719566, + "learning_rate": 3.07039953783762e-06, + "loss": 0.47453213, + "memory(GiB)": 34.67, + "step": 7145, + "train_speed(iter/s)": 1.059088 + }, + { + "acc": 0.91616383, + "epoch": 12.903516681695221, + "grad_norm": 2.5113154832654367, + "learning_rate": 3.063519330554266e-06, + "loss": 0.45489578, + "memory(GiB)": 34.67, + "step": 7150, + "train_speed(iter/s)": 1.055244 + }, + { + "acc": 0.89737759, + "epoch": 12.91253381424707, + "grad_norm": 2.211501042980457, + "learning_rate": 3.0566434368546043e-06, + "loss": 0.5210609, + "memory(GiB)": 34.67, + "step": 7155, + "train_speed(iter/s)": 1.051417 + }, + { + "acc": 0.90460377, + "epoch": 12.921550946798918, + "grad_norm": 3.1140805661653372, + "learning_rate": 3.049771872050995e-06, + "loss": 0.50720844, + "memory(GiB)": 34.67, + "step": 7160, + "train_speed(iter/s)": 1.047636 + }, + { + "acc": 0.90584793, + "epoch": 12.930568079350767, + "grad_norm": 3.407546548366909, + "learning_rate": 3.042904651446154e-06, + "loss": 0.48453746, + "memory(GiB)": 34.67, + "step": 7165, + "train_speed(iter/s)": 1.043889 + }, + { + "acc": 0.91362514, + "epoch": 12.939585211902616, + "grad_norm": 2.5606100097716453, + "learning_rate": 3.036041790333129e-06, + "loss": 0.48806229, + "memory(GiB)": 34.67, + "step": 7170, + "train_speed(iter/s)": 1.04017 + }, + { + "acc": 0.90656824, + "epoch": 12.948602344454464, + "grad_norm": 2.405704397584747, + "learning_rate": 3.029183303995252e-06, + "loss": 0.52617874, + "memory(GiB)": 34.67, + "step": 7175, + "train_speed(iter/s)": 1.036483 + }, + { + "acc": 0.91279545, + "epoch": 12.957619477006311, + "grad_norm": 2.087216576180259, + "learning_rate": 3.02232920770612e-06, + "loss": 0.45612497, + "memory(GiB)": 34.67, + "step": 7180, + "train_speed(iter/s)": 1.03282 + }, + { + "acc": 0.91408367, + "epoch": 12.96663660955816, + "grad_norm": 2.327322093960309, + "learning_rate": 3.0154795167295457e-06, + "loss": 0.48596373, + "memory(GiB)": 34.67, + "step": 7185, + "train_speed(iter/s)": 1.029196 + }, + { + "acc": 0.91417637, + "epoch": 12.975653742110008, + "grad_norm": 2.8950991414330547, + "learning_rate": 3.0086342463195396e-06, + "loss": 0.46941252, + "memory(GiB)": 34.67, + "step": 7190, + "train_speed(iter/s)": 1.025598 + }, + { + "acc": 0.91168137, + "epoch": 12.984670874661857, + "grad_norm": 2.4907450010736816, + "learning_rate": 3.0017934117202598e-06, + "loss": 0.48299589, + "memory(GiB)": 34.67, + "step": 7195, + "train_speed(iter/s)": 1.02203 + }, + { + "acc": 0.91001816, + "epoch": 12.993688007213706, + "grad_norm": 3.0457935298445222, + "learning_rate": 2.994957028165992e-06, + "loss": 0.44837413, + "memory(GiB)": 34.67, + "step": 7200, + "train_speed(iter/s)": 1.018503 + }, + { + "epoch": 13.0, + "eval_acc": 0.9025353283458022, + "eval_loss": 0.25556719303131104, + "eval_runtime": 49.9473, + "eval_samples_per_second": 9.35, + "eval_steps_per_second": 1.181, + "step": 7204 + }, + { + "acc": 0.83156776, + "epoch": 13.001803426510369, + "grad_norm": 1.910434044119821, + "learning_rate": 2.9894911363060965e-06, + "loss": 0.37572508, + "memory(GiB)": 34.67, + "step": 7205, + "train_speed(iter/s)": 1.001063 + }, + { + "acc": 0.91135616, + "epoch": 13.010820559062218, + "grad_norm": 2.9010833977222426, + "learning_rate": 2.9826628029915806e-06, + "loss": 0.45042601, + "memory(GiB)": 34.67, + "step": 7210, + "train_speed(iter/s)": 0.997696 + }, + { + "acc": 0.91050386, + "epoch": 13.019837691614066, + "grad_norm": 2.8666784763643864, + "learning_rate": 2.9758389633252293e-06, + "loss": 0.47643499, + "memory(GiB)": 34.67, + "step": 7215, + "train_speed(iter/s)": 0.994355 + }, + { + "acc": 0.9150609, + "epoch": 13.028854824165915, + "grad_norm": 2.6840022848349894, + "learning_rate": 2.969019632503477e-06, + "loss": 0.44145088, + "memory(GiB)": 34.67, + "step": 7220, + "train_speed(iter/s)": 0.99104 + }, + { + "acc": 0.93762083, + "epoch": 13.037871956717764, + "grad_norm": 3.160010569776109, + "learning_rate": 2.9622048257127204e-06, + "loss": 0.36134422, + "memory(GiB)": 34.67, + "step": 7225, + "train_speed(iter/s)": 0.98775 + }, + { + "acc": 0.910851, + "epoch": 13.046889089269612, + "grad_norm": 2.27660631793261, + "learning_rate": 2.9553945581292793e-06, + "loss": 0.46951542, + "memory(GiB)": 34.67, + "step": 7230, + "train_speed(iter/s)": 0.984481 + }, + { + "acc": 0.90577393, + "epoch": 13.055906221821461, + "grad_norm": 2.3871135155572634, + "learning_rate": 2.948588844919364e-06, + "loss": 0.4867784, + "memory(GiB)": 34.67, + "step": 7235, + "train_speed(iter/s)": 0.981239 + }, + { + "acc": 0.92203779, + "epoch": 13.06492335437331, + "grad_norm": 3.545670641228415, + "learning_rate": 2.941787701239047e-06, + "loss": 0.41522141, + "memory(GiB)": 34.67, + "step": 7240, + "train_speed(iter/s)": 0.978024 + }, + { + "acc": 0.91149569, + "epoch": 13.073940486925158, + "grad_norm": 2.367381737816561, + "learning_rate": 2.93499114223422e-06, + "loss": 0.46987023, + "memory(GiB)": 34.67, + "step": 7245, + "train_speed(iter/s)": 0.974823 + }, + { + "acc": 0.93819427, + "epoch": 13.082957619477007, + "grad_norm": 1.7155186199853616, + "learning_rate": 2.9281991830405672e-06, + "loss": 0.35313714, + "memory(GiB)": 34.67, + "step": 7250, + "train_speed(iter/s)": 0.971652 + }, + { + "acc": 0.91345415, + "epoch": 13.091974752028856, + "grad_norm": 3.006464523365056, + "learning_rate": 2.921411838783525e-06, + "loss": 0.45448046, + "memory(GiB)": 34.67, + "step": 7255, + "train_speed(iter/s)": 0.968508 + }, + { + "acc": 0.90963287, + "epoch": 13.100991884580703, + "grad_norm": 3.396531814582047, + "learning_rate": 2.91462912457826e-06, + "loss": 0.44586344, + "memory(GiB)": 34.67, + "step": 7260, + "train_speed(iter/s)": 0.965395 + }, + { + "acc": 0.90684223, + "epoch": 13.110009017132551, + "grad_norm": 4.290854237947542, + "learning_rate": 2.907851055529622e-06, + "loss": 0.48749828, + "memory(GiB)": 34.67, + "step": 7265, + "train_speed(iter/s)": 0.962299 + }, + { + "acc": 0.93061886, + "epoch": 13.1190261496844, + "grad_norm": 2.0506955335973585, + "learning_rate": 2.90107764673212e-06, + "loss": 0.38269024, + "memory(GiB)": 34.67, + "step": 7270, + "train_speed(iter/s)": 0.959232 + }, + { + "acc": 0.91307583, + "epoch": 13.128043282236249, + "grad_norm": 2.6600976152068845, + "learning_rate": 2.8943089132698787e-06, + "loss": 0.43937092, + "memory(GiB)": 34.67, + "step": 7275, + "train_speed(iter/s)": 0.956189 + }, + { + "acc": 0.90629559, + "epoch": 13.137060414788097, + "grad_norm": 4.0580275056400605, + "learning_rate": 2.8875448702166175e-06, + "loss": 0.48432384, + "memory(GiB)": 34.67, + "step": 7280, + "train_speed(iter/s)": 0.953163 + }, + { + "acc": 0.90866852, + "epoch": 13.146077547339946, + "grad_norm": 2.7017420227692446, + "learning_rate": 2.8807855326356072e-06, + "loss": 0.4423574, + "memory(GiB)": 34.67, + "step": 7285, + "train_speed(iter/s)": 0.950164 + }, + { + "acc": 0.90365028, + "epoch": 13.155094679891794, + "grad_norm": 3.5664870496977796, + "learning_rate": 2.8740309155796436e-06, + "loss": 0.5123939, + "memory(GiB)": 34.67, + "step": 7290, + "train_speed(iter/s)": 0.947182 + }, + { + "acc": 0.90207653, + "epoch": 13.164111812443643, + "grad_norm": 3.79761429823838, + "learning_rate": 2.8672810340910006e-06, + "loss": 0.49108753, + "memory(GiB)": 34.67, + "step": 7295, + "train_speed(iter/s)": 0.944231 + }, + { + "acc": 0.92445564, + "epoch": 13.173128944995492, + "grad_norm": 3.2696712828467405, + "learning_rate": 2.860535903201418e-06, + "loss": 0.43105054, + "memory(GiB)": 34.67, + "step": 7300, + "train_speed(iter/s)": 0.941248 + }, + { + "acc": 0.91931086, + "epoch": 13.18214607754734, + "grad_norm": 3.8005889241331046, + "learning_rate": 2.8537955379320474e-06, + "loss": 0.44570789, + "memory(GiB)": 34.67, + "step": 7305, + "train_speed(iter/s)": 0.938165 + }, + { + "acc": 0.91292334, + "epoch": 13.19116321009919, + "grad_norm": 4.480845662924485, + "learning_rate": 2.8470599532934322e-06, + "loss": 0.42035322, + "memory(GiB)": 34.67, + "step": 7310, + "train_speed(iter/s)": 0.935114 + }, + { + "acc": 0.92798328, + "epoch": 13.200180342651038, + "grad_norm": 2.614446117175046, + "learning_rate": 2.84032916428547e-06, + "loss": 0.37440386, + "memory(GiB)": 34.67, + "step": 7315, + "train_speed(iter/s)": 0.932254 + }, + { + "acc": 0.91777925, + "epoch": 13.209197475202885, + "grad_norm": 2.4629655248966484, + "learning_rate": 2.8336031858973733e-06, + "loss": 0.43911977, + "memory(GiB)": 34.67, + "step": 7320, + "train_speed(iter/s)": 0.929411 + }, + { + "acc": 0.91801252, + "epoch": 13.218214607754733, + "grad_norm": 2.4837495393468174, + "learning_rate": 2.8268820331076464e-06, + "loss": 0.4110014, + "memory(GiB)": 34.67, + "step": 7325, + "train_speed(iter/s)": 0.926584 + }, + { + "acc": 0.92134676, + "epoch": 13.227231740306582, + "grad_norm": 2.4032660248792648, + "learning_rate": 2.8201657208840456e-06, + "loss": 0.42779732, + "memory(GiB)": 34.67, + "step": 7330, + "train_speed(iter/s)": 0.923779 + }, + { + "acc": 0.90932083, + "epoch": 13.23624887285843, + "grad_norm": 3.394816327004731, + "learning_rate": 2.8134542641835507e-06, + "loss": 0.44342723, + "memory(GiB)": 34.67, + "step": 7335, + "train_speed(iter/s)": 0.921001 + }, + { + "acc": 0.92459488, + "epoch": 13.24526600541028, + "grad_norm": 2.1125145267494436, + "learning_rate": 2.8067476779523206e-06, + "loss": 0.40396466, + "memory(GiB)": 34.67, + "step": 7340, + "train_speed(iter/s)": 0.918242 + }, + { + "acc": 0.92494678, + "epoch": 13.254283137962128, + "grad_norm": 2.646012026414015, + "learning_rate": 2.8000459771256757e-06, + "loss": 0.37966986, + "memory(GiB)": 34.67, + "step": 7345, + "train_speed(iter/s)": 0.915503 + }, + { + "acc": 0.91678486, + "epoch": 13.263300270513977, + "grad_norm": 2.642400969521799, + "learning_rate": 2.793349176628051e-06, + "loss": 0.41771069, + "memory(GiB)": 34.67, + "step": 7350, + "train_speed(iter/s)": 0.912783 + }, + { + "acc": 0.91658487, + "epoch": 13.272317403065825, + "grad_norm": 2.281304435325576, + "learning_rate": 2.7866572913729763e-06, + "loss": 0.46291165, + "memory(GiB)": 34.67, + "step": 7355, + "train_speed(iter/s)": 0.910079 + }, + { + "acc": 0.91275826, + "epoch": 13.281334535617674, + "grad_norm": 2.70930507000483, + "learning_rate": 2.7799703362630258e-06, + "loss": 0.458354, + "memory(GiB)": 34.67, + "step": 7360, + "train_speed(iter/s)": 0.907393 + }, + { + "acc": 0.91499748, + "epoch": 13.290351668169523, + "grad_norm": 3.7940222227937754, + "learning_rate": 2.773288326189798e-06, + "loss": 0.47010036, + "memory(GiB)": 34.67, + "step": 7365, + "train_speed(iter/s)": 0.904731 + }, + { + "acc": 0.92498779, + "epoch": 13.299368800721371, + "grad_norm": 2.982398889619566, + "learning_rate": 2.7666112760338853e-06, + "loss": 0.43736539, + "memory(GiB)": 34.67, + "step": 7370, + "train_speed(iter/s)": 0.902095 + }, + { + "acc": 0.91043816, + "epoch": 13.30838593327322, + "grad_norm": 2.0679015274794486, + "learning_rate": 2.7599392006648285e-06, + "loss": 0.46530938, + "memory(GiB)": 34.67, + "step": 7375, + "train_speed(iter/s)": 0.899467 + }, + { + "acc": 0.92218437, + "epoch": 13.317403065825067, + "grad_norm": 1.8848611174016012, + "learning_rate": 2.753272114941087e-06, + "loss": 0.4195118, + "memory(GiB)": 34.67, + "step": 7380, + "train_speed(iter/s)": 0.896868 + }, + { + "acc": 0.93011036, + "epoch": 13.326420198376915, + "grad_norm": 2.7178073478219082, + "learning_rate": 2.746610033710013e-06, + "loss": 0.36692064, + "memory(GiB)": 34.67, + "step": 7385, + "train_speed(iter/s)": 0.894284 + }, + { + "acc": 0.90709066, + "epoch": 13.335437330928764, + "grad_norm": 2.56576319099396, + "learning_rate": 2.7399529718078143e-06, + "loss": 0.50453234, + "memory(GiB)": 34.67, + "step": 7390, + "train_speed(iter/s)": 0.891719 + }, + { + "acc": 0.90788584, + "epoch": 13.344454463480613, + "grad_norm": 2.340105644715265, + "learning_rate": 2.7333009440595216e-06, + "loss": 0.50700455, + "memory(GiB)": 34.67, + "step": 7395, + "train_speed(iter/s)": 0.889171 + }, + { + "acc": 0.90255871, + "epoch": 13.353471596032461, + "grad_norm": 2.7141721941934924, + "learning_rate": 2.726653965278947e-06, + "loss": 0.46706529, + "memory(GiB)": 34.67, + "step": 7400, + "train_speed(iter/s)": 0.886633 + }, + { + "acc": 0.92647762, + "epoch": 13.36248872858431, + "grad_norm": 4.133163480197946, + "learning_rate": 2.720012050268667e-06, + "loss": 0.39142969, + "memory(GiB)": 34.67, + "step": 7405, + "train_speed(iter/s)": 0.884115 + }, + { + "acc": 0.91706448, + "epoch": 13.371505861136159, + "grad_norm": 2.8733374569819206, + "learning_rate": 2.713375213819978e-06, + "loss": 0.4484066, + "memory(GiB)": 34.67, + "step": 7410, + "train_speed(iter/s)": 0.881621 + }, + { + "acc": 0.89831924, + "epoch": 13.380522993688007, + "grad_norm": 3.7669824367312152, + "learning_rate": 2.7067434707128708e-06, + "loss": 0.49606962, + "memory(GiB)": 34.67, + "step": 7415, + "train_speed(iter/s)": 0.879144 + }, + { + "acc": 0.89532213, + "epoch": 13.389540126239856, + "grad_norm": 3.3646782634934875, + "learning_rate": 2.7001168357159846e-06, + "loss": 0.49138098, + "memory(GiB)": 34.67, + "step": 7420, + "train_speed(iter/s)": 0.876686 + }, + { + "acc": 0.92779675, + "epoch": 13.398557258791705, + "grad_norm": 2.9373135278479396, + "learning_rate": 2.69349532358659e-06, + "loss": 0.38355405, + "memory(GiB)": 34.67, + "step": 7425, + "train_speed(iter/s)": 0.874246 + }, + { + "acc": 0.92377272, + "epoch": 13.407574391343553, + "grad_norm": 2.9765340958395754, + "learning_rate": 2.686878949070549e-06, + "loss": 0.41119652, + "memory(GiB)": 34.67, + "step": 7430, + "train_speed(iter/s)": 0.87182 + }, + { + "acc": 0.91067591, + "epoch": 13.416591523895402, + "grad_norm": 3.705403956327573, + "learning_rate": 2.6802677269022815e-06, + "loss": 0.45417252, + "memory(GiB)": 34.67, + "step": 7435, + "train_speed(iter/s)": 0.869415 + }, + { + "acc": 0.92136078, + "epoch": 13.425608656447249, + "grad_norm": 3.2789817249364375, + "learning_rate": 2.6736616718047286e-06, + "loss": 0.40200434, + "memory(GiB)": 34.67, + "step": 7440, + "train_speed(iter/s)": 0.867017 + }, + { + "acc": 0.91240864, + "epoch": 13.434625788999098, + "grad_norm": 2.2247812089028596, + "learning_rate": 2.6670607984893315e-06, + "loss": 0.46096272, + "memory(GiB)": 34.67, + "step": 7445, + "train_speed(iter/s)": 0.864643 + }, + { + "acc": 0.90765324, + "epoch": 13.443642921550946, + "grad_norm": 2.5340394179146974, + "learning_rate": 2.6604651216559887e-06, + "loss": 0.41848903, + "memory(GiB)": 34.67, + "step": 7450, + "train_speed(iter/s)": 0.862279 + }, + { + "acc": 0.90695438, + "epoch": 13.452660054102795, + "grad_norm": 2.563710193611515, + "learning_rate": 2.653874655993026e-06, + "loss": 0.49381599, + "memory(GiB)": 34.67, + "step": 7455, + "train_speed(iter/s)": 0.859933 + }, + { + "acc": 0.91303482, + "epoch": 13.461677186654644, + "grad_norm": 3.047339345762434, + "learning_rate": 2.647289416177167e-06, + "loss": 0.44869227, + "memory(GiB)": 34.67, + "step": 7460, + "train_speed(iter/s)": 0.857597 + }, + { + "acc": 0.92487192, + "epoch": 13.470694319206492, + "grad_norm": 2.1385095794722724, + "learning_rate": 2.64070941687349e-06, + "loss": 0.40038085, + "memory(GiB)": 34.67, + "step": 7465, + "train_speed(iter/s)": 0.855277 + }, + { + "acc": 0.90575371, + "epoch": 13.479711451758341, + "grad_norm": 3.0483696799435225, + "learning_rate": 2.6341346727354113e-06, + "loss": 0.50467086, + "memory(GiB)": 34.67, + "step": 7470, + "train_speed(iter/s)": 0.852982 + }, + { + "acc": 0.91957855, + "epoch": 13.48872858431019, + "grad_norm": 3.6440923105623684, + "learning_rate": 2.62756519840464e-06, + "loss": 0.44822865, + "memory(GiB)": 34.67, + "step": 7475, + "train_speed(iter/s)": 0.850704 + }, + { + "acc": 0.92911835, + "epoch": 13.497745716862038, + "grad_norm": 3.2803904544089213, + "learning_rate": 2.6210010085111507e-06, + "loss": 0.37346206, + "memory(GiB)": 34.67, + "step": 7480, + "train_speed(iter/s)": 0.848441 + }, + { + "acc": 0.91992598, + "epoch": 13.506762849413887, + "grad_norm": 2.254405202090834, + "learning_rate": 2.6144421176731503e-06, + "loss": 0.4058012, + "memory(GiB)": 34.67, + "step": 7485, + "train_speed(iter/s)": 0.846191 + }, + { + "acc": 0.92690392, + "epoch": 13.515779981965736, + "grad_norm": 2.8697510836962388, + "learning_rate": 2.6078885404970415e-06, + "loss": 0.40351672, + "memory(GiB)": 34.67, + "step": 7490, + "train_speed(iter/s)": 0.84395 + }, + { + "acc": 0.91610775, + "epoch": 13.524797114517582, + "grad_norm": 2.051569997394548, + "learning_rate": 2.6013402915774017e-06, + "loss": 0.46603556, + "memory(GiB)": 34.67, + "step": 7495, + "train_speed(iter/s)": 0.84172 + }, + { + "acc": 0.91617641, + "epoch": 13.533814247069431, + "grad_norm": 2.286165118967432, + "learning_rate": 2.59479738549693e-06, + "loss": 0.45086269, + "memory(GiB)": 34.67, + "step": 7500, + "train_speed(iter/s)": 0.839511 + }, + { + "acc": 0.93104916, + "epoch": 13.54283137962128, + "grad_norm": 3.8643444719875646, + "learning_rate": 2.588259836826437e-06, + "loss": 0.37835269, + "memory(GiB)": 34.67, + "step": 7505, + "train_speed(iter/s)": 0.837317 + }, + { + "acc": 0.92141943, + "epoch": 13.551848512173128, + "grad_norm": 2.225645199352156, + "learning_rate": 2.5817276601248016e-06, + "loss": 0.44836111, + "memory(GiB)": 34.67, + "step": 7510, + "train_speed(iter/s)": 0.835139 + }, + { + "acc": 0.9025341, + "epoch": 13.560865644724977, + "grad_norm": 3.4844716672940432, + "learning_rate": 2.5752008699389348e-06, + "loss": 0.50761642, + "memory(GiB)": 34.67, + "step": 7515, + "train_speed(iter/s)": 0.832969 + }, + { + "acc": 0.91783009, + "epoch": 13.569882777276826, + "grad_norm": 3.326678568900906, + "learning_rate": 2.5686794808037574e-06, + "loss": 0.46847334, + "memory(GiB)": 34.67, + "step": 7520, + "train_speed(iter/s)": 0.830819 + }, + { + "acc": 0.91352987, + "epoch": 13.578899909828674, + "grad_norm": 2.8228888062944786, + "learning_rate": 2.5621635072421575e-06, + "loss": 0.46015668, + "memory(GiB)": 34.67, + "step": 7525, + "train_speed(iter/s)": 0.828649 + }, + { + "acc": 0.92407179, + "epoch": 13.587917042380523, + "grad_norm": 3.006678121026572, + "learning_rate": 2.5556529637649653e-06, + "loss": 0.45808477, + "memory(GiB)": 34.67, + "step": 7530, + "train_speed(iter/s)": 0.826424 + }, + { + "acc": 0.92271099, + "epoch": 13.596934174932372, + "grad_norm": 2.3647315636854396, + "learning_rate": 2.5491478648709196e-06, + "loss": 0.40352392, + "memory(GiB)": 34.67, + "step": 7535, + "train_speed(iter/s)": 0.824268 + }, + { + "acc": 0.90452938, + "epoch": 13.60595130748422, + "grad_norm": 2.586311310712336, + "learning_rate": 2.5426482250466338e-06, + "loss": 0.53205042, + "memory(GiB)": 34.67, + "step": 7540, + "train_speed(iter/s)": 0.822175 + }, + { + "acc": 0.91857758, + "epoch": 13.614968440036069, + "grad_norm": 1.7748948002446534, + "learning_rate": 2.5361540587665605e-06, + "loss": 0.42117982, + "memory(GiB)": 34.67, + "step": 7545, + "train_speed(iter/s)": 0.820091 + }, + { + "acc": 0.92564392, + "epoch": 13.623985572587918, + "grad_norm": 2.416666583742134, + "learning_rate": 2.5296653804929667e-06, + "loss": 0.37726135, + "memory(GiB)": 34.67, + "step": 7550, + "train_speed(iter/s)": 0.818016 + }, + { + "acc": 0.90426903, + "epoch": 13.633002705139766, + "grad_norm": 2.4728096245909317, + "learning_rate": 2.5231822046758976e-06, + "loss": 0.51140699, + "memory(GiB)": 34.67, + "step": 7555, + "train_speed(iter/s)": 0.815963 + }, + { + "acc": 0.91746035, + "epoch": 13.642019837691613, + "grad_norm": 2.2490695665177975, + "learning_rate": 2.5167045457531447e-06, + "loss": 0.45246611, + "memory(GiB)": 34.67, + "step": 7560, + "train_speed(iter/s)": 0.813926 + }, + { + "acc": 0.92741585, + "epoch": 13.651036970243462, + "grad_norm": 3.770299354880114, + "learning_rate": 2.510232418150211e-06, + "loss": 0.37998924, + "memory(GiB)": 34.67, + "step": 7565, + "train_speed(iter/s)": 0.811899 + }, + { + "acc": 0.9042696, + "epoch": 13.66005410279531, + "grad_norm": 2.426793152085983, + "learning_rate": 2.503765836280283e-06, + "loss": 0.50290551, + "memory(GiB)": 34.67, + "step": 7570, + "train_speed(iter/s)": 0.809883 + }, + { + "acc": 0.92482624, + "epoch": 13.66907123534716, + "grad_norm": 3.5893481594395875, + "learning_rate": 2.4973048145441984e-06, + "loss": 0.38070757, + "memory(GiB)": 34.67, + "step": 7575, + "train_speed(iter/s)": 0.807877 + }, + { + "acc": 0.90683136, + "epoch": 13.678088367899008, + "grad_norm": 2.4112487872224104, + "learning_rate": 2.490849367330412e-06, + "loss": 0.45160856, + "memory(GiB)": 34.67, + "step": 7580, + "train_speed(iter/s)": 0.805888 + }, + { + "acc": 0.91481924, + "epoch": 13.687105500450857, + "grad_norm": 3.9976426199831074, + "learning_rate": 2.484399509014962e-06, + "loss": 0.44831839, + "memory(GiB)": 34.67, + "step": 7585, + "train_speed(iter/s)": 0.803907 + }, + { + "acc": 0.91878014, + "epoch": 13.696122633002705, + "grad_norm": 2.028143131118245, + "learning_rate": 2.4779552539614447e-06, + "loss": 0.41924124, + "memory(GiB)": 34.67, + "step": 7590, + "train_speed(iter/s)": 0.801939 + }, + { + "acc": 0.91641684, + "epoch": 13.705139765554554, + "grad_norm": 3.8461718844426542, + "learning_rate": 2.4715166165209736e-06, + "loss": 0.42823806, + "memory(GiB)": 34.67, + "step": 7595, + "train_speed(iter/s)": 0.799981 + }, + { + "acc": 0.91030054, + "epoch": 13.714156898106403, + "grad_norm": 2.209311395879964, + "learning_rate": 2.465083611032158e-06, + "loss": 0.47398629, + "memory(GiB)": 34.67, + "step": 7600, + "train_speed(iter/s)": 0.798037 + }, + { + "acc": 0.93113747, + "epoch": 13.723174030658251, + "grad_norm": 2.5553222520456518, + "learning_rate": 2.4586562518210554e-06, + "loss": 0.39157214, + "memory(GiB)": 34.67, + "step": 7605, + "train_speed(iter/s)": 0.79611 + }, + { + "acc": 0.91998997, + "epoch": 13.7321911632101, + "grad_norm": 2.7813427865917446, + "learning_rate": 2.4522345532011583e-06, + "loss": 0.42484126, + "memory(GiB)": 34.67, + "step": 7610, + "train_speed(iter/s)": 0.794195 + }, + { + "acc": 0.91799946, + "epoch": 13.741208295761947, + "grad_norm": 2.565290365377133, + "learning_rate": 2.445818529473349e-06, + "loss": 0.44959812, + "memory(GiB)": 34.67, + "step": 7615, + "train_speed(iter/s)": 0.792289 + }, + { + "acc": 0.93449316, + "epoch": 13.750225428313795, + "grad_norm": 2.7806118998990756, + "learning_rate": 2.4394081949258754e-06, + "loss": 0.36552901, + "memory(GiB)": 34.67, + "step": 7620, + "train_speed(iter/s)": 0.790394 + }, + { + "acc": 0.92006874, + "epoch": 13.759242560865644, + "grad_norm": 2.6335169545111685, + "learning_rate": 2.43300356383431e-06, + "loss": 0.42915936, + "memory(GiB)": 34.67, + "step": 7625, + "train_speed(iter/s)": 0.788513 + }, + { + "acc": 0.91715298, + "epoch": 13.768259693417493, + "grad_norm": 2.6110020416685806, + "learning_rate": 2.4266046504615283e-06, + "loss": 0.43826027, + "memory(GiB)": 34.67, + "step": 7630, + "train_speed(iter/s)": 0.786641 + }, + { + "acc": 0.9005537, + "epoch": 13.777276825969341, + "grad_norm": 2.389448526677138, + "learning_rate": 2.420211469057672e-06, + "loss": 0.49912429, + "memory(GiB)": 34.67, + "step": 7635, + "train_speed(iter/s)": 0.784778 + }, + { + "acc": 0.93391418, + "epoch": 13.78629395852119, + "grad_norm": 2.3303458261589105, + "learning_rate": 2.4138240338601187e-06, + "loss": 0.39306188, + "memory(GiB)": 34.67, + "step": 7640, + "train_speed(iter/s)": 0.782927 + }, + { + "acc": 0.91246166, + "epoch": 13.795311091073039, + "grad_norm": 3.345441834133012, + "learning_rate": 2.407442359093448e-06, + "loss": 0.4463088, + "memory(GiB)": 34.67, + "step": 7645, + "train_speed(iter/s)": 0.781091 + }, + { + "acc": 0.92767334, + "epoch": 13.804328223624887, + "grad_norm": 2.311986915142374, + "learning_rate": 2.40106645896941e-06, + "loss": 0.40542097, + "memory(GiB)": 34.67, + "step": 7650, + "train_speed(iter/s)": 0.779264 + }, + { + "acc": 0.92072134, + "epoch": 13.813345356176736, + "grad_norm": 2.721893786946378, + "learning_rate": 2.394696347686897e-06, + "loss": 0.4374537, + "memory(GiB)": 34.67, + "step": 7655, + "train_speed(iter/s)": 0.777446 + }, + { + "acc": 0.89793825, + "epoch": 13.822362488728585, + "grad_norm": 3.04927251622692, + "learning_rate": 2.388332039431909e-06, + "loss": 0.51242447, + "memory(GiB)": 34.67, + "step": 7660, + "train_speed(iter/s)": 0.775638 + }, + { + "acc": 0.91029463, + "epoch": 13.831379621280433, + "grad_norm": 2.330530114774836, + "learning_rate": 2.381973548377526e-06, + "loss": 0.4661911, + "memory(GiB)": 34.67, + "step": 7665, + "train_speed(iter/s)": 0.773837 + }, + { + "acc": 0.91078424, + "epoch": 13.840396753832282, + "grad_norm": 3.0249717723702103, + "learning_rate": 2.3756208886838657e-06, + "loss": 0.45671978, + "memory(GiB)": 34.67, + "step": 7670, + "train_speed(iter/s)": 0.772046 + }, + { + "acc": 0.92062321, + "epoch": 13.84941388638413, + "grad_norm": 2.3289482832980455, + "learning_rate": 2.3692740744980665e-06, + "loss": 0.45508952, + "memory(GiB)": 34.67, + "step": 7675, + "train_speed(iter/s)": 0.770273 + }, + { + "acc": 0.9151104, + "epoch": 13.858431018935978, + "grad_norm": 1.991707358142325, + "learning_rate": 2.3629331199542472e-06, + "loss": 0.45227852, + "memory(GiB)": 34.67, + "step": 7680, + "train_speed(iter/s)": 0.768512 + }, + { + "acc": 0.92223511, + "epoch": 13.867448151487826, + "grad_norm": 2.8714618075645575, + "learning_rate": 2.3565980391734785e-06, + "loss": 0.41313591, + "memory(GiB)": 34.67, + "step": 7685, + "train_speed(iter/s)": 0.766755 + }, + { + "acc": 0.91029892, + "epoch": 13.876465284039675, + "grad_norm": 3.8192779037514044, + "learning_rate": 2.350268846263745e-06, + "loss": 0.47502518, + "memory(GiB)": 34.67, + "step": 7690, + "train_speed(iter/s)": 0.765014 + }, + { + "acc": 0.90931625, + "epoch": 13.885482416591524, + "grad_norm": 3.6311347936367198, + "learning_rate": 2.3439455553199234e-06, + "loss": 0.52817411, + "memory(GiB)": 34.67, + "step": 7695, + "train_speed(iter/s)": 0.763279 + }, + { + "acc": 0.90909996, + "epoch": 13.894499549143372, + "grad_norm": 3.070435648155786, + "learning_rate": 2.337628180423752e-06, + "loss": 0.47671447, + "memory(GiB)": 34.67, + "step": 7700, + "train_speed(iter/s)": 0.76155 + }, + { + "acc": 0.91981907, + "epoch": 13.903516681695221, + "grad_norm": 2.9136339190019367, + "learning_rate": 2.3313167356437847e-06, + "loss": 0.4204464, + "memory(GiB)": 34.67, + "step": 7705, + "train_speed(iter/s)": 0.759827 + }, + { + "acc": 0.9279768, + "epoch": 13.91253381424707, + "grad_norm": 1.7584487100424862, + "learning_rate": 2.3250112350353738e-06, + "loss": 0.3965879, + "memory(GiB)": 34.67, + "step": 7710, + "train_speed(iter/s)": 0.758123 + }, + { + "acc": 0.92543163, + "epoch": 13.921550946798918, + "grad_norm": 2.074743460894259, + "learning_rate": 2.318711692640635e-06, + "loss": 0.40781312, + "memory(GiB)": 34.67, + "step": 7715, + "train_speed(iter/s)": 0.756432 + }, + { + "acc": 0.92013721, + "epoch": 13.930568079350767, + "grad_norm": 2.391497930161413, + "learning_rate": 2.312418122488414e-06, + "loss": 0.40785055, + "memory(GiB)": 34.67, + "step": 7720, + "train_speed(iter/s)": 0.754747 + }, + { + "acc": 0.90895977, + "epoch": 13.939585211902616, + "grad_norm": 2.282832199655629, + "learning_rate": 2.306130538594259e-06, + "loss": 0.48620806, + "memory(GiB)": 34.67, + "step": 7725, + "train_speed(iter/s)": 0.75307 + }, + { + "acc": 0.92811804, + "epoch": 13.948602344454464, + "grad_norm": 3.2542288268715436, + "learning_rate": 2.2998489549603794e-06, + "loss": 0.36995916, + "memory(GiB)": 34.67, + "step": 7730, + "train_speed(iter/s)": 0.751402 + }, + { + "acc": 0.92376108, + "epoch": 13.957619477006311, + "grad_norm": 1.9798170143802984, + "learning_rate": 2.2935733855756316e-06, + "loss": 0.42149258, + "memory(GiB)": 34.67, + "step": 7735, + "train_speed(iter/s)": 0.749747 + }, + { + "acc": 0.92298889, + "epoch": 13.96663660955816, + "grad_norm": 2.7042116336273865, + "learning_rate": 2.287303844415473e-06, + "loss": 0.40348473, + "memory(GiB)": 34.67, + "step": 7740, + "train_speed(iter/s)": 0.748106 + }, + { + "acc": 0.91418896, + "epoch": 13.975653742110008, + "grad_norm": 2.2087753277719946, + "learning_rate": 2.281040345441941e-06, + "loss": 0.4406374, + "memory(GiB)": 34.67, + "step": 7745, + "train_speed(iter/s)": 0.746471 + }, + { + "acc": 0.90041313, + "epoch": 13.984670874661857, + "grad_norm": 3.272289949082435, + "learning_rate": 2.274782902603609e-06, + "loss": 0.51967297, + "memory(GiB)": 34.67, + "step": 7750, + "train_speed(iter/s)": 0.744826 + }, + { + "acc": 0.91371441, + "epoch": 13.993688007213706, + "grad_norm": 3.8788683819560346, + "learning_rate": 2.2685315298355725e-06, + "loss": 0.48910103, + "memory(GiB)": 34.67, + "step": 7755, + "train_speed(iter/s)": 0.743147 + }, + { + "epoch": 14.0, + "eval_acc": 0.9115752285951787, + "eval_loss": 0.24021737277507782, + "eval_runtime": 49.8939, + "eval_samples_per_second": 9.36, + "eval_steps_per_second": 1.183, + "step": 7759 + } + ], + "logging_steps": 5, + "max_steps": 11080, + "num_input_tokens_seen": 0, + "num_train_epochs": 20, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5105557816049664.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}