|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.10772578890097932, |
|
"eval_steps": 50, |
|
"global_step": 99, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001088139281828074, |
|
"grad_norm": 0.2615509629249573, |
|
"learning_rate": 5e-05, |
|
"loss": 0.838, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001088139281828074, |
|
"eval_loss": 0.23379258811473846, |
|
"eval_runtime": 24.7758, |
|
"eval_samples_per_second": 15.62, |
|
"eval_steps_per_second": 7.83, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002176278563656148, |
|
"grad_norm": 0.27500101923942566, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0311, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003264417845484222, |
|
"grad_norm": 0.26265749335289, |
|
"learning_rate": 0.00015, |
|
"loss": 0.7598, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004352557127312296, |
|
"grad_norm": 0.34702393412590027, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7643, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00544069640914037, |
|
"grad_norm": 0.26868095993995667, |
|
"learning_rate": 0.00025, |
|
"loss": 0.8676, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006528835690968444, |
|
"grad_norm": 0.6326900124549866, |
|
"learning_rate": 0.0003, |
|
"loss": 0.9927, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007616974972796518, |
|
"grad_norm": 0.6055320501327515, |
|
"learning_rate": 0.00035, |
|
"loss": 0.7319, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008705114254624592, |
|
"grad_norm": 0.43092861771583557, |
|
"learning_rate": 0.0004, |
|
"loss": 0.7695, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009793253536452665, |
|
"grad_norm": 0.40037959814071655, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 0.7098, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01088139281828074, |
|
"grad_norm": 0.39341455698013306, |
|
"learning_rate": 0.0005, |
|
"loss": 0.4365, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011969532100108813, |
|
"grad_norm": 0.8135687112808228, |
|
"learning_rate": 0.0004998442655654946, |
|
"loss": 0.7446, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.013057671381936888, |
|
"grad_norm": 0.8516698479652405, |
|
"learning_rate": 0.0004993772562876909, |
|
"loss": 0.3987, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.014145810663764961, |
|
"grad_norm": 0.3541325628757477, |
|
"learning_rate": 0.0004985995540019955, |
|
"loss": 0.4453, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.015233949945593036, |
|
"grad_norm": 0.5297847986221313, |
|
"learning_rate": 0.0004975121276286136, |
|
"loss": 0.5155, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01632208922742111, |
|
"grad_norm": 0.442644327878952, |
|
"learning_rate": 0.0004961163319653958, |
|
"loss": 0.4814, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.017410228509249184, |
|
"grad_norm": 0.4812023341655731, |
|
"learning_rate": 0.0004944139059999286, |
|
"loss": 0.5217, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.018498367791077257, |
|
"grad_norm": 0.4284003674983978, |
|
"learning_rate": 0.000492406970742972, |
|
"loss": 0.2965, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01958650707290533, |
|
"grad_norm": 0.5950977206230164, |
|
"learning_rate": 0.0004900980265859448, |
|
"loss": 0.5757, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.020674646354733407, |
|
"grad_norm": 0.35087430477142334, |
|
"learning_rate": 0.0004874899501857477, |
|
"loss": 0.314, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02176278563656148, |
|
"grad_norm": 0.2939762771129608, |
|
"learning_rate": 0.00048458599088080736, |
|
"loss": 0.3879, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.022850924918389554, |
|
"grad_norm": 0.3336440324783325, |
|
"learning_rate": 0.0004813897666428053, |
|
"loss": 0.3425, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.023939064200217627, |
|
"grad_norm": 0.5165703296661377, |
|
"learning_rate": 0.00047790525956913543, |
|
"loss": 0.4142, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.025027203482045703, |
|
"grad_norm": 0.41237401962280273, |
|
"learning_rate": 0.0004741368109217071, |
|
"loss": 0.4147, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.026115342763873776, |
|
"grad_norm": 0.5671696066856384, |
|
"learning_rate": 0.00047008911571827283, |
|
"loss": 0.48, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02720348204570185, |
|
"grad_norm": 0.42801016569137573, |
|
"learning_rate": 0.00046576721688302105, |
|
"loss": 0.3996, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028291621327529923, |
|
"grad_norm": 0.3613467514514923, |
|
"learning_rate": 0.0004611764989637205, |
|
"loss": 0.3176, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.029379760609358, |
|
"grad_norm": 1.3145064115524292, |
|
"learning_rate": 0.0004563226814232444, |
|
"loss": 0.4326, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.030467899891186073, |
|
"grad_norm": 0.47409093379974365, |
|
"learning_rate": 0.0004512118115138315, |
|
"loss": 0.3246, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.031556039173014146, |
|
"grad_norm": 0.5220752954483032, |
|
"learning_rate": 0.0004458502567429631, |
|
"loss": 0.5013, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03264417845484222, |
|
"grad_norm": 1.3407182693481445, |
|
"learning_rate": 0.00044024469694024196, |
|
"loss": 0.4893, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03373231773667029, |
|
"grad_norm": 0.7757295370101929, |
|
"learning_rate": 0.00043440211593515554, |
|
"loss": 0.4163, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03482045701849837, |
|
"grad_norm": 0.27271905541419983, |
|
"learning_rate": 0.0004283297928560951, |
|
"loss": 0.2256, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.035908596300326445, |
|
"grad_norm": 0.4762435257434845, |
|
"learning_rate": 0.0004220352930614672, |
|
"loss": 0.4584, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.036996735582154515, |
|
"grad_norm": 0.5020000338554382, |
|
"learning_rate": 0.00041552645871420013, |
|
"loss": 0.4403, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03808487486398259, |
|
"grad_norm": 0.3345811069011688, |
|
"learning_rate": 0.00040881139901138467, |
|
"loss": 0.4192, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03917301414581066, |
|
"grad_norm": 0.2985716462135315, |
|
"learning_rate": 0.00040189848008122475, |
|
"loss": 0.2805, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04026115342763874, |
|
"grad_norm": 0.8610369563102722, |
|
"learning_rate": 0.00039479631455988334, |
|
"loss": 0.718, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.041349292709466814, |
|
"grad_norm": 0.4035598635673523, |
|
"learning_rate": 0.0003875137508612103, |
|
"loss": 0.3767, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.042437431991294884, |
|
"grad_norm": 0.3371526002883911, |
|
"learning_rate": 0.00038005986215272055, |
|
"loss": 0.3665, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04352557127312296, |
|
"grad_norm": 0.9126002788543701, |
|
"learning_rate": 0.0003724439350515571, |
|
"loss": 0.6029, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04461371055495103, |
|
"grad_norm": 0.34127795696258545, |
|
"learning_rate": 0.0003646754580545226, |
|
"loss": 0.4245, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04570184983677911, |
|
"grad_norm": 0.4555828869342804, |
|
"learning_rate": 0.000356764109716594, |
|
"loss": 0.6118, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.046789989118607184, |
|
"grad_norm": 0.30817294120788574, |
|
"learning_rate": 0.00034871974659264783, |
|
"loss": 0.3774, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04787812840043525, |
|
"grad_norm": 0.6372089982032776, |
|
"learning_rate": 0.0003405523909574206, |
|
"loss": 0.4466, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04896626768226333, |
|
"grad_norm": 0.2218654602766037, |
|
"learning_rate": 0.0003322722183190025, |
|
"loss": 0.2911, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05005440696409141, |
|
"grad_norm": 0.7268356084823608, |
|
"learning_rate": 0.0003238895447414211, |
|
"loss": 0.5186, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.051142546245919476, |
|
"grad_norm": 0.3474952280521393, |
|
"learning_rate": 0.0003154148139921102, |
|
"loss": 0.3702, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05223068552774755, |
|
"grad_norm": 0.47476518154144287, |
|
"learning_rate": 0.00030685858453027663, |
|
"loss": 0.3098, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05331882480957562, |
|
"grad_norm": 0.33052054047584534, |
|
"learning_rate": 0.0002982315163523742, |
|
"loss": 0.3452, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0544069640914037, |
|
"grad_norm": 0.3936608135700226, |
|
"learning_rate": 0.000289544357711076, |
|
"loss": 0.3504, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0544069640914037, |
|
"eval_loss": 0.10187384486198425, |
|
"eval_runtime": 24.5923, |
|
"eval_samples_per_second": 15.737, |
|
"eval_steps_per_second": 7.889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.055495103373231776, |
|
"grad_norm": 0.3658672273159027, |
|
"learning_rate": 0.0002808079317242896, |
|
"loss": 0.3851, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.056583242655059846, |
|
"grad_norm": 0.2899942994117737, |
|
"learning_rate": 0.0002720331228909005, |
|
"loss": 0.2768, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05767138193688792, |
|
"grad_norm": 0.6674902439117432, |
|
"learning_rate": 0.00026323086353004075, |
|
"loss": 0.6506, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.058759521218716, |
|
"grad_norm": 0.8436214923858643, |
|
"learning_rate": 0.0002544121201607822, |
|
"loss": 0.8227, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05984766050054407, |
|
"grad_norm": 0.2817295789718628, |
|
"learning_rate": 0.00024558787983921783, |
|
"loss": 0.3449, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.060935799782372145, |
|
"grad_norm": 0.25585779547691345, |
|
"learning_rate": 0.0002367691364699592, |
|
"loss": 0.2408, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.062023939064200215, |
|
"grad_norm": 0.250396192073822, |
|
"learning_rate": 0.00022796687710909964, |
|
"loss": 0.2588, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.06311207834602829, |
|
"grad_norm": 0.6818592548370361, |
|
"learning_rate": 0.00021919206827571036, |
|
"loss": 0.5172, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06420021762785637, |
|
"grad_norm": 0.8470868468284607, |
|
"learning_rate": 0.00021045564228892402, |
|
"loss": 0.2823, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.06528835690968444, |
|
"grad_norm": 0.48449358344078064, |
|
"learning_rate": 0.00020176848364762578, |
|
"loss": 0.3356, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06637649619151251, |
|
"grad_norm": 0.25606584548950195, |
|
"learning_rate": 0.00019314141546972343, |
|
"loss": 0.3552, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.06746463547334058, |
|
"grad_norm": 0.21550142765045166, |
|
"learning_rate": 0.00018458518600788986, |
|
"loss": 0.2025, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06855277475516866, |
|
"grad_norm": 0.33708736300468445, |
|
"learning_rate": 0.00017611045525857898, |
|
"loss": 0.299, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06964091403699674, |
|
"grad_norm": 0.41520461440086365, |
|
"learning_rate": 0.0001677277816809975, |
|
"loss": 0.3242, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07072905331882481, |
|
"grad_norm": 0.9485870003700256, |
|
"learning_rate": 0.00015944760904257942, |
|
"loss": 0.4488, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07181719260065289, |
|
"grad_norm": 0.36165550351142883, |
|
"learning_rate": 0.0001512802534073522, |
|
"loss": 0.5672, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07290533188248095, |
|
"grad_norm": 0.6473806500434875, |
|
"learning_rate": 0.00014323589028340596, |
|
"loss": 0.6128, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.07399347116430903, |
|
"grad_norm": 0.2864266633987427, |
|
"learning_rate": 0.00013532454194547733, |
|
"loss": 0.3151, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0750816104461371, |
|
"grad_norm": 0.2809907793998718, |
|
"learning_rate": 0.00012755606494844294, |
|
"loss": 0.1535, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.07616974972796518, |
|
"grad_norm": 0.32490411400794983, |
|
"learning_rate": 0.00011994013784727947, |
|
"loss": 0.3442, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07725788900979326, |
|
"grad_norm": 0.6548874974250793, |
|
"learning_rate": 0.00011248624913878966, |
|
"loss": 0.5008, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.07834602829162132, |
|
"grad_norm": 0.7361955046653748, |
|
"learning_rate": 0.0001052036854401166, |
|
"loss": 0.6513, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0794341675734494, |
|
"grad_norm": 0.5869486927986145, |
|
"learning_rate": 9.810151991877531e-05, |
|
"loss": 0.4508, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08052230685527748, |
|
"grad_norm": 0.3208440840244293, |
|
"learning_rate": 9.118860098861537e-05, |
|
"loss": 0.325, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08161044613710555, |
|
"grad_norm": 0.35303372144699097, |
|
"learning_rate": 8.44735412857999e-05, |
|
"loss": 0.327, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.08269858541893363, |
|
"grad_norm": 0.22797748446464539, |
|
"learning_rate": 7.79647069385328e-05, |
|
"loss": 0.3183, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08378672470076169, |
|
"grad_norm": 0.328909307718277, |
|
"learning_rate": 7.167020714390501e-05, |
|
"loss": 0.3248, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.08487486398258977, |
|
"grad_norm": 0.34297263622283936, |
|
"learning_rate": 6.559788406484446e-05, |
|
"loss": 0.2397, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.08596300326441784, |
|
"grad_norm": 0.6872113943099976, |
|
"learning_rate": 5.975530305975807e-05, |
|
"loss": 0.5042, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.08705114254624592, |
|
"grad_norm": 0.42344143986701965, |
|
"learning_rate": 5.414974325703686e-05, |
|
"loss": 0.3577, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.088139281828074, |
|
"grad_norm": 0.31334200501441956, |
|
"learning_rate": 4.8788188486168616e-05, |
|
"loss": 0.2931, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.08922742110990206, |
|
"grad_norm": 0.394218385219574, |
|
"learning_rate": 4.367731857675569e-05, |
|
"loss": 0.3908, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09031556039173014, |
|
"grad_norm": 0.3325517475605011, |
|
"learning_rate": 3.882350103627952e-05, |
|
"loss": 0.351, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09140369967355821, |
|
"grad_norm": 0.72450190782547, |
|
"learning_rate": 3.423278311697897e-05, |
|
"loss": 0.5302, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09249183895538629, |
|
"grad_norm": 0.9300814867019653, |
|
"learning_rate": 2.9910884281727225e-05, |
|
"loss": 0.6356, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.09357997823721437, |
|
"grad_norm": 0.36167338490486145, |
|
"learning_rate": 2.586318907829291e-05, |
|
"loss": 0.4763, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09466811751904244, |
|
"grad_norm": 0.3068345785140991, |
|
"learning_rate": 2.209474043086457e-05, |
|
"loss": 0.3455, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0957562568008705, |
|
"grad_norm": 0.4473964273929596, |
|
"learning_rate": 1.861023335719475e-05, |
|
"loss": 0.5717, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.09684439608269858, |
|
"grad_norm": 0.31159886717796326, |
|
"learning_rate": 1.5414009119192633e-05, |
|
"loss": 0.298, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.09793253536452666, |
|
"grad_norm": 0.2886298894882202, |
|
"learning_rate": 1.25100498142523e-05, |
|
"loss": 0.3209, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09902067464635474, |
|
"grad_norm": 0.27010682225227356, |
|
"learning_rate": 9.901973414055187e-06, |
|
"loss": 0.2501, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10010881392818281, |
|
"grad_norm": 0.3159748315811157, |
|
"learning_rate": 7.593029257027956e-06, |
|
"loss": 0.2336, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.10119695321001088, |
|
"grad_norm": 0.5132943391799927, |
|
"learning_rate": 5.5860940000714015e-06, |
|
"loss": 0.4026, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.10228509249183895, |
|
"grad_norm": 0.3360811769962311, |
|
"learning_rate": 3.8836680346041594e-06, |
|
"loss": 0.3444, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.10337323177366703, |
|
"grad_norm": 0.9271918535232544, |
|
"learning_rate": 2.487872371386424e-06, |
|
"loss": 0.7088, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1044613710554951, |
|
"grad_norm": 0.7888476252555847, |
|
"learning_rate": 1.4004459980045125e-06, |
|
"loss": 0.5126, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.10554951033732318, |
|
"grad_norm": 0.25628480315208435, |
|
"learning_rate": 6.22743712309054e-07, |
|
"loss": 0.2571, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.10663764961915125, |
|
"grad_norm": 0.4536930322647095, |
|
"learning_rate": 1.557344345054501e-07, |
|
"loss": 0.5685, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.10772578890097932, |
|
"grad_norm": 0.7662109732627869, |
|
"learning_rate": 0.0, |
|
"loss": 0.5405, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 99, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.988754859143987e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|