|
{ |
|
"best_metric": 0.6729200482368469, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-300", |
|
"epoch": 0.24737167594310452, |
|
"eval_steps": 50, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008245722531436817, |
|
"grad_norm": 0.4181750416755676, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.7232, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008245722531436817, |
|
"eval_loss": 0.8246814608573914, |
|
"eval_runtime": 42.8717, |
|
"eval_samples_per_second": 9.237, |
|
"eval_steps_per_second": 9.237, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016491445062873633, |
|
"grad_norm": 0.40615200996398926, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.8142, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0024737167594310453, |
|
"grad_norm": 0.37523695826530457, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.7982, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0032982890125747267, |
|
"grad_norm": 0.36036789417266846, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.836, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0041228612657184084, |
|
"grad_norm": 0.394962877035141, |
|
"learning_rate": 0.00015, |
|
"loss": 0.8198, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004947433518862091, |
|
"grad_norm": 0.416413277387619, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.7857, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005772005772005772, |
|
"grad_norm": 0.5225532650947571, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.7836, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006596578025149453, |
|
"grad_norm": 0.39612719416618347, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.7467, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0074211502782931356, |
|
"grad_norm": 0.3856079578399658, |
|
"learning_rate": 0.00027, |
|
"loss": 0.7699, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008245722531436817, |
|
"grad_norm": 0.3804507851600647, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7913, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009070294784580499, |
|
"grad_norm": 0.3713577389717102, |
|
"learning_rate": 0.0002999911984174669, |
|
"loss": 0.7259, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009894867037724181, |
|
"grad_norm": 0.3443286418914795, |
|
"learning_rate": 0.0002999647947027726, |
|
"loss": 0.7223, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010719439290867862, |
|
"grad_norm": 0.32728496193885803, |
|
"learning_rate": 0.0002999207919545099, |
|
"loss": 0.7663, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011544011544011544, |
|
"grad_norm": 0.3246591091156006, |
|
"learning_rate": 0.0002998591953365965, |
|
"loss": 0.749, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012368583797155226, |
|
"grad_norm": 0.32717230916023254, |
|
"learning_rate": 0.00029978001207766854, |
|
"loss": 0.7442, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013193156050298907, |
|
"grad_norm": 0.3290279805660248, |
|
"learning_rate": 0.00029968325147023263, |
|
"loss": 0.7572, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014017728303442589, |
|
"grad_norm": 0.35251685976982117, |
|
"learning_rate": 0.000299568924869575, |
|
"loss": 0.7737, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014842300556586271, |
|
"grad_norm": 0.3338168263435364, |
|
"learning_rate": 0.00029943704569242917, |
|
"loss": 0.7588, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01566687280972995, |
|
"grad_norm": 0.32795682549476624, |
|
"learning_rate": 0.0002992876294154013, |
|
"loss": 0.7875, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.016491445062873634, |
|
"grad_norm": 0.3186565339565277, |
|
"learning_rate": 0.00029912069357315393, |
|
"loss": 0.7336, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.017316017316017316, |
|
"grad_norm": 0.3066052794456482, |
|
"learning_rate": 0.00029893625775634835, |
|
"loss": 0.7219, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.018140589569160998, |
|
"grad_norm": 0.31656959652900696, |
|
"learning_rate": 0.0002987343436093454, |
|
"loss": 0.7583, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01896516182230468, |
|
"grad_norm": 0.3124566674232483, |
|
"learning_rate": 0.00029851497482766547, |
|
"loss": 0.7673, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.019789734075448363, |
|
"grad_norm": 0.30470794439315796, |
|
"learning_rate": 0.00029827817715520773, |
|
"loss": 0.7122, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02061430632859204, |
|
"grad_norm": 0.3141001760959625, |
|
"learning_rate": 0.0002980239783812289, |
|
"loss": 0.8052, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.021438878581735724, |
|
"grad_norm": 0.32485461235046387, |
|
"learning_rate": 0.0002977524083370822, |
|
"loss": 0.7482, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.022263450834879406, |
|
"grad_norm": 0.31203949451446533, |
|
"learning_rate": 0.00029746349889271645, |
|
"loss": 0.6784, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.023088023088023088, |
|
"grad_norm": 0.32462912797927856, |
|
"learning_rate": 0.0002971572839529358, |
|
"loss": 0.767, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02391259534116677, |
|
"grad_norm": 0.3174619972705841, |
|
"learning_rate": 0.00029683379945342125, |
|
"loss": 0.7283, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.024737167594310452, |
|
"grad_norm": 0.33072593808174133, |
|
"learning_rate": 0.000296493083356513, |
|
"loss": 0.8054, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.025561739847454135, |
|
"grad_norm": 0.31485095620155334, |
|
"learning_rate": 0.00029613517564675565, |
|
"loss": 0.7499, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.026386312100597813, |
|
"grad_norm": 0.3060151934623718, |
|
"learning_rate": 0.0002957601183262058, |
|
"loss": 0.7342, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.027210884353741496, |
|
"grad_norm": 0.31382423639297485, |
|
"learning_rate": 0.000295367955409503, |
|
"loss": 0.7529, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.028035456606885178, |
|
"grad_norm": 0.3076959550380707, |
|
"learning_rate": 0.00029495873291870436, |
|
"loss": 0.6956, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02886002886002886, |
|
"grad_norm": 0.31915611028671265, |
|
"learning_rate": 0.0002945324988778834, |
|
"loss": 0.7885, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.029684601113172542, |
|
"grad_norm": 0.3209708034992218, |
|
"learning_rate": 0.00029408930330749477, |
|
"loss": 0.7536, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.030509173366316224, |
|
"grad_norm": 0.33205971121788025, |
|
"learning_rate": 0.0002936291982185036, |
|
"loss": 0.7633, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0313337456194599, |
|
"grad_norm": 0.3191162049770355, |
|
"learning_rate": 0.00029315223760628217, |
|
"loss": 0.7437, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.032158317872603585, |
|
"grad_norm": 0.34463781118392944, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.7586, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03298289012574727, |
|
"grad_norm": 0.3288670778274536, |
|
"learning_rate": 0.00029214797567742035, |
|
"loss": 0.7812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03380746237889095, |
|
"grad_norm": 0.3351598381996155, |
|
"learning_rate": 0.00029162079221537, |
|
"loss": 0.785, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03463203463203463, |
|
"grad_norm": 0.3358539938926697, |
|
"learning_rate": 0.0002910769889254386, |
|
"loss": 0.7145, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.035456606885178314, |
|
"grad_norm": 0.3250412344932556, |
|
"learning_rate": 0.0002905166296253533, |
|
"loss": 0.7333, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.036281179138321996, |
|
"grad_norm": 0.324008584022522, |
|
"learning_rate": 0.0002899397800757626, |
|
"loss": 0.7508, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03710575139146568, |
|
"grad_norm": 0.3390953242778778, |
|
"learning_rate": 0.0002893465079725187, |
|
"loss": 0.6994, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03793032364460936, |
|
"grad_norm": 0.3244946002960205, |
|
"learning_rate": 0.0002887368829387333, |
|
"loss": 0.6945, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03875489589775304, |
|
"grad_norm": 0.34154826402664185, |
|
"learning_rate": 0.0002881109765166071, |
|
"loss": 0.7437, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.039579468150896725, |
|
"grad_norm": 0.36960309743881226, |
|
"learning_rate": 0.00028746886215903387, |
|
"loss": 0.7878, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04040404040404041, |
|
"grad_norm": 0.39132601022720337, |
|
"learning_rate": 0.00028681061522098047, |
|
"loss": 0.731, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04122861265718408, |
|
"grad_norm": 0.68941730260849, |
|
"learning_rate": 0.0002861363129506435, |
|
"loss": 0.7756, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04122861265718408, |
|
"eval_loss": 0.7312861084938049, |
|
"eval_runtime": 42.6476, |
|
"eval_samples_per_second": 9.285, |
|
"eval_steps_per_second": 9.285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.042053184910327765, |
|
"grad_norm": 0.3118657171726227, |
|
"learning_rate": 0.0002854460344803842, |
|
"loss": 0.6581, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04287775716347145, |
|
"grad_norm": 0.33690398931503296, |
|
"learning_rate": 0.00028473986081744163, |
|
"loss": 0.7433, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04370232941661513, |
|
"grad_norm": 0.33627963066101074, |
|
"learning_rate": 0.000284017874834426, |
|
"loss": 0.73, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04452690166975881, |
|
"grad_norm": 0.3204888105392456, |
|
"learning_rate": 0.0002832801612595937, |
|
"loss": 0.7398, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.045351473922902494, |
|
"grad_norm": 0.3057969808578491, |
|
"learning_rate": 0.0002825268066669034, |
|
"loss": 0.6966, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.046176046176046176, |
|
"grad_norm": 0.30388593673706055, |
|
"learning_rate": 0.00028175789946585693, |
|
"loss": 0.672, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04700061842918986, |
|
"grad_norm": 0.31205031275749207, |
|
"learning_rate": 0.0002809735298911234, |
|
"loss": 0.7422, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04782519068233354, |
|
"grad_norm": 0.317719042301178, |
|
"learning_rate": 0.00028017378999195015, |
|
"loss": 0.7359, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04864976293547722, |
|
"grad_norm": 0.3247784674167633, |
|
"learning_rate": 0.0002793587736213603, |
|
"loss": 0.688, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.049474335188620905, |
|
"grad_norm": 0.32107478380203247, |
|
"learning_rate": 0.00027852857642513836, |
|
"loss": 0.7123, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05029890744176459, |
|
"grad_norm": 0.3145858645439148, |
|
"learning_rate": 0.00027768329583060635, |
|
"loss": 0.7274, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05112347969490827, |
|
"grad_norm": 0.3061201870441437, |
|
"learning_rate": 0.00027682303103518976, |
|
"loss": 0.7098, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05194805194805195, |
|
"grad_norm": 0.31068190932273865, |
|
"learning_rate": 0.00027594788299477655, |
|
"loss": 0.7176, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05277262420119563, |
|
"grad_norm": 0.32404160499572754, |
|
"learning_rate": 0.0002750579544118695, |
|
"loss": 0.7304, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05359719645433931, |
|
"grad_norm": 0.3103507459163666, |
|
"learning_rate": 0.00027415334972353357, |
|
"loss": 0.7112, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05442176870748299, |
|
"grad_norm": 0.31706494092941284, |
|
"learning_rate": 0.0002732341750891397, |
|
"loss": 0.6998, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05524634096062667, |
|
"grad_norm": 0.30976295471191406, |
|
"learning_rate": 0.00027230053837790666, |
|
"loss": 0.671, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.056070913213770356, |
|
"grad_norm": 0.3198986351490021, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.7438, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05689548546691404, |
|
"grad_norm": 0.3304222822189331, |
|
"learning_rate": 0.0002703903186748843, |
|
"loss": 0.814, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05772005772005772, |
|
"grad_norm": 0.3097413182258606, |
|
"learning_rate": 0.00026941395985584653, |
|
"loss": 0.6695, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0585446299732014, |
|
"grad_norm": 0.31618732213974, |
|
"learning_rate": 0.00026842358727916524, |
|
"loss": 0.739, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.059369202226345084, |
|
"grad_norm": 0.3159608542919159, |
|
"learning_rate": 0.0002674193171694533, |
|
"loss": 0.7172, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06019377447948877, |
|
"grad_norm": 0.30797278881073, |
|
"learning_rate": 0.0002664012673822609, |
|
"loss": 0.7187, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06101834673263245, |
|
"grad_norm": 0.3251792788505554, |
|
"learning_rate": 0.0002653695573902443, |
|
"loss": 0.796, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06184291898577613, |
|
"grad_norm": 0.3200572431087494, |
|
"learning_rate": 0.0002643243082691454, |
|
"loss": 0.7268, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0626674912389198, |
|
"grad_norm": 0.29941457509994507, |
|
"learning_rate": 0.0002632656426835831, |
|
"loss": 0.6928, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06349206349206349, |
|
"grad_norm": 0.3021852672100067, |
|
"learning_rate": 0.00026219368487265753, |
|
"loss": 0.7257, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06431663574520717, |
|
"grad_norm": 0.30670538544654846, |
|
"learning_rate": 0.00026110856063537083, |
|
"loss": 0.699, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06514120799835085, |
|
"grad_norm": 0.3165689706802368, |
|
"learning_rate": 0.00026001039731586334, |
|
"loss": 0.7149, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06596578025149454, |
|
"grad_norm": 0.3253694474697113, |
|
"learning_rate": 0.0002588993237884696, |
|
"loss": 0.8077, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06679035250463822, |
|
"grad_norm": 0.31187790632247925, |
|
"learning_rate": 0.00025777547044259435, |
|
"loss": 0.7574, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0676149247577819, |
|
"grad_norm": 0.32256990671157837, |
|
"learning_rate": 0.0002566389691674106, |
|
"loss": 0.7692, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06843949701092558, |
|
"grad_norm": 0.3136816918849945, |
|
"learning_rate": 0.00025548995333638197, |
|
"loss": 0.7452, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06926406926406926, |
|
"grad_norm": 0.3113918602466583, |
|
"learning_rate": 0.00025432855779161076, |
|
"loss": 0.711, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07008864151721295, |
|
"grad_norm": 0.30899491906166077, |
|
"learning_rate": 0.00025315491882801347, |
|
"loss": 0.7414, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07091321377035663, |
|
"grad_norm": 0.31038275361061096, |
|
"learning_rate": 0.00025196917417732615, |
|
"loss": 0.69, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07173778602350031, |
|
"grad_norm": 0.32005515694618225, |
|
"learning_rate": 0.0002507714629919409, |
|
"loss": 0.7283, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07256235827664399, |
|
"grad_norm": 0.31575390696525574, |
|
"learning_rate": 0.0002495619258285757, |
|
"loss": 0.6974, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07338693052978768, |
|
"grad_norm": 0.3133029341697693, |
|
"learning_rate": 0.0002483407046317794, |
|
"loss": 0.7083, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07421150278293136, |
|
"grad_norm": 0.32530999183654785, |
|
"learning_rate": 0.00024710794271727413, |
|
"loss": 0.7165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07503607503607504, |
|
"grad_norm": 0.31330084800720215, |
|
"learning_rate": 0.0002458637847551364, |
|
"loss": 0.6542, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07586064728921872, |
|
"grad_norm": 0.3188161849975586, |
|
"learning_rate": 0.00024460837675281926, |
|
"loss": 0.6734, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0766852195423624, |
|
"grad_norm": 0.3343876600265503, |
|
"learning_rate": 0.00024334186603801807, |
|
"loss": 0.7359, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07750979179550609, |
|
"grad_norm": 0.33376866579055786, |
|
"learning_rate": 0.00024206440124138062, |
|
"loss": 0.6967, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07833436404864977, |
|
"grad_norm": 0.3637959957122803, |
|
"learning_rate": 0.0002407761322790648, |
|
"loss": 0.7499, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07915893630179345, |
|
"grad_norm": 0.335549533367157, |
|
"learning_rate": 0.00023947721033514512, |
|
"loss": 0.7196, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07998350855493713, |
|
"grad_norm": 0.35548627376556396, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.737, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08080808080808081, |
|
"grad_norm": 0.3780907094478607, |
|
"learning_rate": 0.0002368480184717773, |
|
"loss": 0.7565, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 0.40498775243759155, |
|
"learning_rate": 0.00023551805709965147, |
|
"loss": 0.6692, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08245722531436817, |
|
"grad_norm": 0.8926903605461121, |
|
"learning_rate": 0.00023417805980435736, |
|
"loss": 0.4799, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08245722531436817, |
|
"eval_loss": 0.7042385935783386, |
|
"eval_runtime": 42.7803, |
|
"eval_samples_per_second": 9.257, |
|
"eval_steps_per_second": 9.257, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08328179756751185, |
|
"grad_norm": 0.29144763946533203, |
|
"learning_rate": 0.00023282818384051866, |
|
"loss": 0.6653, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08410636982065553, |
|
"grad_norm": 0.290532648563385, |
|
"learning_rate": 0.00023146858762206489, |
|
"loss": 0.6903, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08493094207379921, |
|
"grad_norm": 0.304470956325531, |
|
"learning_rate": 0.00023009943070364044, |
|
"loss": 0.6943, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0857555143269429, |
|
"grad_norm": 0.31192028522491455, |
|
"learning_rate": 0.0002287208737618801, |
|
"loss": 0.7152, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08658008658008658, |
|
"grad_norm": 0.30607473850250244, |
|
"learning_rate": 0.00022733307857655325, |
|
"loss": 0.7248, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08740465883323026, |
|
"grad_norm": 0.2998904883861542, |
|
"learning_rate": 0.00022593620801157808, |
|
"loss": 0.7291, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08822923108637394, |
|
"grad_norm": 0.3096725344657898, |
|
"learning_rate": 0.00022453042599590882, |
|
"loss": 0.7241, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08905380333951762, |
|
"grad_norm": 0.3047013282775879, |
|
"learning_rate": 0.00022311589750429787, |
|
"loss": 0.7206, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.0898783755926613, |
|
"grad_norm": 0.3195563554763794, |
|
"learning_rate": 0.00022169278853793545, |
|
"loss": 0.7217, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09070294784580499, |
|
"grad_norm": 0.32428956031799316, |
|
"learning_rate": 0.00022026126610496852, |
|
"loss": 0.6962, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09152752009894867, |
|
"grad_norm": 0.31494930386543274, |
|
"learning_rate": 0.0002188214982009016, |
|
"loss": 0.6812, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09235209235209235, |
|
"grad_norm": 0.329039603471756, |
|
"learning_rate": 0.00021737365378888187, |
|
"loss": 0.7871, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09317666460523603, |
|
"grad_norm": 0.3266455829143524, |
|
"learning_rate": 0.00021591790277987043, |
|
"loss": 0.7367, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09400123685837972, |
|
"grad_norm": 0.31767553091049194, |
|
"learning_rate": 0.00021445441601270276, |
|
"loss": 0.7702, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0948258091115234, |
|
"grad_norm": 0.32063761353492737, |
|
"learning_rate": 0.00021298336523403968, |
|
"loss": 0.7514, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09565038136466708, |
|
"grad_norm": 0.297237366437912, |
|
"learning_rate": 0.0002115049230782124, |
|
"loss": 0.6745, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09647495361781076, |
|
"grad_norm": 0.30952492356300354, |
|
"learning_rate": 0.00021001926304696296, |
|
"loss": 0.6953, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09729952587095445, |
|
"grad_norm": 0.3084593415260315, |
|
"learning_rate": 0.00020852655948908316, |
|
"loss": 0.7184, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09812409812409813, |
|
"grad_norm": 0.30032604932785034, |
|
"learning_rate": 0.0002070269875799538, |
|
"loss": 0.6477, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09894867037724181, |
|
"grad_norm": 0.3085014224052429, |
|
"learning_rate": 0.00020552072330098716, |
|
"loss": 0.7137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09977324263038549, |
|
"grad_norm": 0.3050251007080078, |
|
"learning_rate": 0.0002040079434189748, |
|
"loss": 0.6947, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10059781488352917, |
|
"grad_norm": 0.29092761874198914, |
|
"learning_rate": 0.00020248882546534326, |
|
"loss": 0.6166, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10142238713667286, |
|
"grad_norm": 0.32282838225364685, |
|
"learning_rate": 0.00020096354771531976, |
|
"loss": 0.7807, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10224695938981654, |
|
"grad_norm": 0.3140111565589905, |
|
"learning_rate": 0.00019943228916701104, |
|
"loss": 0.7271, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10307153164296022, |
|
"grad_norm": 0.3199913203716278, |
|
"learning_rate": 0.00019789522952039695, |
|
"loss": 0.6998, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1038961038961039, |
|
"grad_norm": 0.3230836093425751, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.7354, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10472067614924757, |
|
"grad_norm": 0.32371896505355835, |
|
"learning_rate": 0.00019480442911492702, |
|
"loss": 0.7583, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10554524840239125, |
|
"grad_norm": 0.3246193826198578, |
|
"learning_rate": 0.00019325105107520263, |
|
"loss": 0.7296, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10636982065553494, |
|
"grad_norm": 0.3160342872142792, |
|
"learning_rate": 0.00019169259733286913, |
|
"loss": 0.7091, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10719439290867862, |
|
"grad_norm": 0.3068806231021881, |
|
"learning_rate": 0.00019012925077938314, |
|
"loss": 0.7068, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1080189651618223, |
|
"grad_norm": 0.31874704360961914, |
|
"learning_rate": 0.0001885611948803941, |
|
"loss": 0.7188, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10884353741496598, |
|
"grad_norm": 0.30816522240638733, |
|
"learning_rate": 0.0001869886136542143, |
|
"loss": 0.7145, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10966810966810966, |
|
"grad_norm": 0.3057398498058319, |
|
"learning_rate": 0.00018541169165022298, |
|
"loss": 0.6788, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11049268192125335, |
|
"grad_norm": 0.30414751172065735, |
|
"learning_rate": 0.00018383061392720913, |
|
"loss": 0.6881, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11131725417439703, |
|
"grad_norm": 0.31472885608673096, |
|
"learning_rate": 0.0001822455660316536, |
|
"loss": 0.7089, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11214182642754071, |
|
"grad_norm": 0.309356153011322, |
|
"learning_rate": 0.00018065673397595473, |
|
"loss": 0.6732, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1129663986806844, |
|
"grad_norm": 0.30579566955566406, |
|
"learning_rate": 0.00017906430421659876, |
|
"loss": 0.6453, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11379097093382808, |
|
"grad_norm": 0.3306417465209961, |
|
"learning_rate": 0.00017746846363227842, |
|
"loss": 0.7327, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11461554318697176, |
|
"grad_norm": 0.32207512855529785, |
|
"learning_rate": 0.00017586939950196186, |
|
"loss": 0.677, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11544011544011544, |
|
"grad_norm": 0.3170906603336334, |
|
"learning_rate": 0.00017426729948291474, |
|
"loss": 0.6764, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11626468769325912, |
|
"grad_norm": 0.32157909870147705, |
|
"learning_rate": 0.00017266235158867752, |
|
"loss": 0.7016, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1170892599464028, |
|
"grad_norm": 0.3478415012359619, |
|
"learning_rate": 0.00017105474416700164, |
|
"loss": 0.6956, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11791383219954649, |
|
"grad_norm": 0.32598310708999634, |
|
"learning_rate": 0.0001694446658777458, |
|
"loss": 0.6716, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11873840445269017, |
|
"grad_norm": 0.3289110064506531, |
|
"learning_rate": 0.00016783230567073596, |
|
"loss": 0.7107, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11956297670583385, |
|
"grad_norm": 0.34824198484420776, |
|
"learning_rate": 0.00016621785276359127, |
|
"loss": 0.6887, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12038754895897753, |
|
"grad_norm": 0.35514163970947266, |
|
"learning_rate": 0.0001646014966195185, |
|
"loss": 0.7147, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12121212121212122, |
|
"grad_norm": 0.3446742296218872, |
|
"learning_rate": 0.00016298342692507763, |
|
"loss": 0.6324, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1220366934652649, |
|
"grad_norm": 0.3650054633617401, |
|
"learning_rate": 0.00016136383356792156, |
|
"loss": 0.6782, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12286126571840858, |
|
"grad_norm": 0.3771435022354126, |
|
"learning_rate": 0.0001597429066145116, |
|
"loss": 0.6823, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12368583797155226, |
|
"grad_norm": 0.6211467981338501, |
|
"learning_rate": 0.0001581208362878126, |
|
"loss": 0.6651, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12368583797155226, |
|
"eval_loss": 0.6936895847320557, |
|
"eval_runtime": 42.757, |
|
"eval_samples_per_second": 9.262, |
|
"eval_steps_per_second": 9.262, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12451041022469594, |
|
"grad_norm": 0.3050060570240021, |
|
"learning_rate": 0.00015649781294496933, |
|
"loss": 0.5971, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1253349824778396, |
|
"grad_norm": 0.31458163261413574, |
|
"learning_rate": 0.00015487402705496707, |
|
"loss": 0.7042, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1261595547309833, |
|
"grad_norm": 0.30443075299263, |
|
"learning_rate": 0.0001532496691762796, |
|
"loss": 0.6492, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12698412698412698, |
|
"grad_norm": 0.31165623664855957, |
|
"learning_rate": 0.00015162492993450597, |
|
"loss": 0.7077, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.12780869923727067, |
|
"grad_norm": 0.3184472322463989, |
|
"learning_rate": 0.00015, |
|
"loss": 0.7271, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12863327149041434, |
|
"grad_norm": 0.31272202730178833, |
|
"learning_rate": 0.00014837507006549403, |
|
"loss": 0.7611, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.12945784374355804, |
|
"grad_norm": 0.3030358850955963, |
|
"learning_rate": 0.00014675033082372038, |
|
"loss": 0.7276, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.1302824159967017, |
|
"grad_norm": 0.31098514795303345, |
|
"learning_rate": 0.00014512597294503293, |
|
"loss": 0.6906, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1311069882498454, |
|
"grad_norm": 0.3137834370136261, |
|
"learning_rate": 0.00014350218705503067, |
|
"loss": 0.7106, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.13193156050298907, |
|
"grad_norm": 0.3150627315044403, |
|
"learning_rate": 0.00014187916371218736, |
|
"loss": 0.7156, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13275613275613277, |
|
"grad_norm": 0.31015968322753906, |
|
"learning_rate": 0.00014025709338548836, |
|
"loss": 0.6631, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.13358070500927643, |
|
"grad_norm": 0.29935508966445923, |
|
"learning_rate": 0.00013863616643207844, |
|
"loss": 0.6572, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13440527726242013, |
|
"grad_norm": 0.30627861618995667, |
|
"learning_rate": 0.00013701657307492235, |
|
"loss": 0.6925, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1352298495155638, |
|
"grad_norm": 0.32339316606521606, |
|
"learning_rate": 0.00013539850338048154, |
|
"loss": 0.7109, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 0.31456539034843445, |
|
"learning_rate": 0.00013378214723640876, |
|
"loss": 0.697, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13687899402185116, |
|
"grad_norm": 0.3141114413738251, |
|
"learning_rate": 0.00013216769432926404, |
|
"loss": 0.6922, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13770356627499486, |
|
"grad_norm": 0.32136693596839905, |
|
"learning_rate": 0.00013055533412225422, |
|
"loss": 0.7408, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.13852813852813853, |
|
"grad_norm": 0.31578657031059265, |
|
"learning_rate": 0.00012894525583299833, |
|
"loss": 0.7104, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1393527107812822, |
|
"grad_norm": 0.3017723262310028, |
|
"learning_rate": 0.0001273376484113225, |
|
"loss": 0.672, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1401772830344259, |
|
"grad_norm": 0.3208543658256531, |
|
"learning_rate": 0.0001257327005170853, |
|
"loss": 0.7146, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14100185528756956, |
|
"grad_norm": 0.31153324246406555, |
|
"learning_rate": 0.00012413060049803814, |
|
"loss": 0.6276, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.14182642754071326, |
|
"grad_norm": 0.3135003447532654, |
|
"learning_rate": 0.00012253153636772156, |
|
"loss": 0.6705, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.14265099979385693, |
|
"grad_norm": 0.30405810475349426, |
|
"learning_rate": 0.00012093569578340124, |
|
"loss": 0.6602, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.14347557204700062, |
|
"grad_norm": 0.32480090856552124, |
|
"learning_rate": 0.00011934326602404528, |
|
"loss": 0.6755, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1443001443001443, |
|
"grad_norm": 0.3095938563346863, |
|
"learning_rate": 0.00011775443396834638, |
|
"loss": 0.7024, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14512471655328799, |
|
"grad_norm": 0.32258421182632446, |
|
"learning_rate": 0.00011616938607279086, |
|
"loss": 0.7044, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14594928880643165, |
|
"grad_norm": 0.3185218870639801, |
|
"learning_rate": 0.00011458830834977698, |
|
"loss": 0.7224, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.14677386105957535, |
|
"grad_norm": 0.31658223271369934, |
|
"learning_rate": 0.0001130113863457857, |
|
"loss": 0.7346, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14759843331271902, |
|
"grad_norm": 0.31286993622779846, |
|
"learning_rate": 0.00011143880511960584, |
|
"loss": 0.7063, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.14842300556586271, |
|
"grad_norm": 0.30426645278930664, |
|
"learning_rate": 0.00010987074922061689, |
|
"loss": 0.6781, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14924757781900638, |
|
"grad_norm": 0.30204102396965027, |
|
"learning_rate": 0.00010830740266713087, |
|
"loss": 0.6914, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.15007215007215008, |
|
"grad_norm": 0.3117939233779907, |
|
"learning_rate": 0.00010674894892479738, |
|
"loss": 0.714, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.15089672232529375, |
|
"grad_norm": 0.30473530292510986, |
|
"learning_rate": 0.00010519557088507298, |
|
"loss": 0.6351, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.15172129457843744, |
|
"grad_norm": 0.30228060483932495, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.6095, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1525458668315811, |
|
"grad_norm": 0.32118985056877136, |
|
"learning_rate": 0.00010210477047960302, |
|
"loss": 0.7311, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1533704390847248, |
|
"grad_norm": 0.31862205266952515, |
|
"learning_rate": 0.00010056771083298893, |
|
"loss": 0.6728, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.15419501133786848, |
|
"grad_norm": 0.3058205246925354, |
|
"learning_rate": 9.903645228468024e-05, |
|
"loss": 0.6495, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.15501958359101217, |
|
"grad_norm": 0.3158295750617981, |
|
"learning_rate": 9.751117453465673e-05, |
|
"loss": 0.688, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15584415584415584, |
|
"grad_norm": 0.32295459508895874, |
|
"learning_rate": 9.59920565810252e-05, |
|
"loss": 0.7007, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15666872809729954, |
|
"grad_norm": 0.3153195083141327, |
|
"learning_rate": 9.447927669901282e-05, |
|
"loss": 0.6781, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1574933003504432, |
|
"grad_norm": 0.31914830207824707, |
|
"learning_rate": 9.297301242004618e-05, |
|
"loss": 0.6459, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1583178726035869, |
|
"grad_norm": 0.32512032985687256, |
|
"learning_rate": 9.14734405109168e-05, |
|
"loss": 0.708, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15914244485673057, |
|
"grad_norm": 0.3304908573627472, |
|
"learning_rate": 8.998073695303701e-05, |
|
"loss": 0.6703, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.15996701710987427, |
|
"grad_norm": 0.33657538890838623, |
|
"learning_rate": 8.849507692178758e-05, |
|
"loss": 0.6989, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.16079158936301793, |
|
"grad_norm": 0.33851417899131775, |
|
"learning_rate": 8.70166347659603e-05, |
|
"loss": 0.709, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.16161616161616163, |
|
"grad_norm": 0.3366221785545349, |
|
"learning_rate": 8.554558398729725e-05, |
|
"loss": 0.6213, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1624407338693053, |
|
"grad_norm": 0.3471497595310211, |
|
"learning_rate": 8.408209722012956e-05, |
|
"loss": 0.7261, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 0.35795000195503235, |
|
"learning_rate": 8.262634621111818e-05, |
|
"loss": 0.639, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.16408987837559266, |
|
"grad_norm": 0.3911006450653076, |
|
"learning_rate": 8.117850179909842e-05, |
|
"loss": 0.6943, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.16491445062873633, |
|
"grad_norm": 0.5652897357940674, |
|
"learning_rate": 7.973873389503149e-05, |
|
"loss": 0.6569, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16491445062873633, |
|
"eval_loss": 0.6824482679367065, |
|
"eval_runtime": 42.7512, |
|
"eval_samples_per_second": 9.263, |
|
"eval_steps_per_second": 9.263, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16573902288188003, |
|
"grad_norm": 0.29706528782844543, |
|
"learning_rate": 7.830721146206451e-05, |
|
"loss": 0.5975, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1665635951350237, |
|
"grad_norm": 0.3147106170654297, |
|
"learning_rate": 7.688410249570214e-05, |
|
"loss": 0.7244, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1673881673881674, |
|
"grad_norm": 0.3083764314651489, |
|
"learning_rate": 7.54695740040912e-05, |
|
"loss": 0.6659, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.16821273964131106, |
|
"grad_norm": 0.31014275550842285, |
|
"learning_rate": 7.406379198842189e-05, |
|
"loss": 0.6614, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16903731189445476, |
|
"grad_norm": 0.3195944130420685, |
|
"learning_rate": 7.266692142344672e-05, |
|
"loss": 0.7009, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16986188414759842, |
|
"grad_norm": 0.30758917331695557, |
|
"learning_rate": 7.127912623811993e-05, |
|
"loss": 0.6654, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.17068645640074212, |
|
"grad_norm": 0.30025753378868103, |
|
"learning_rate": 6.990056929635957e-05, |
|
"loss": 0.6463, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1715110286538858, |
|
"grad_norm": 0.3022189736366272, |
|
"learning_rate": 6.853141237793506e-05, |
|
"loss": 0.6522, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.17233560090702948, |
|
"grad_norm": 0.3084527850151062, |
|
"learning_rate": 6.717181615948126e-05, |
|
"loss": 0.7102, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.17316017316017315, |
|
"grad_norm": 0.3123379647731781, |
|
"learning_rate": 6.582194019564266e-05, |
|
"loss": 0.6977, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.17398474541331685, |
|
"grad_norm": 0.3006177842617035, |
|
"learning_rate": 6.448194290034848e-05, |
|
"loss": 0.6934, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.17480931766646052, |
|
"grad_norm": 0.30770057439804077, |
|
"learning_rate": 6.315198152822272e-05, |
|
"loss": 0.6854, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1756338899196042, |
|
"grad_norm": 0.31282415986061096, |
|
"learning_rate": 6.183221215612904e-05, |
|
"loss": 0.7131, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.17645846217274788, |
|
"grad_norm": 0.3112223148345947, |
|
"learning_rate": 6.052278966485491e-05, |
|
"loss": 0.7005, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.17728303442589158, |
|
"grad_norm": 0.30379047989845276, |
|
"learning_rate": 5.922386772093526e-05, |
|
"loss": 0.6716, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17810760667903525, |
|
"grad_norm": 0.318124920129776, |
|
"learning_rate": 5.793559875861938e-05, |
|
"loss": 0.6676, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17893217893217894, |
|
"grad_norm": 0.32162636518478394, |
|
"learning_rate": 5.6658133961981894e-05, |
|
"loss": 0.716, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1797567511853226, |
|
"grad_norm": 0.2995055615901947, |
|
"learning_rate": 5.5391623247180744e-05, |
|
"loss": 0.6375, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1805813234384663, |
|
"grad_norm": 0.30936968326568604, |
|
"learning_rate": 5.413621524486363e-05, |
|
"loss": 0.6462, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.18140589569160998, |
|
"grad_norm": 0.31474071741104126, |
|
"learning_rate": 5.289205728272586e-05, |
|
"loss": 0.7098, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.18223046794475367, |
|
"grad_norm": 0.31727197766304016, |
|
"learning_rate": 5.165929536822059e-05, |
|
"loss": 0.7516, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.18305504019789734, |
|
"grad_norm": 0.32454827427864075, |
|
"learning_rate": 5.043807417142436e-05, |
|
"loss": 0.7308, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.18387961245104104, |
|
"grad_norm": 0.3066427409648895, |
|
"learning_rate": 4.922853700805909e-05, |
|
"loss": 0.6555, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1847041847041847, |
|
"grad_norm": 0.3160225749015808, |
|
"learning_rate": 4.8030825822673814e-05, |
|
"loss": 0.6733, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.18552875695732837, |
|
"grad_norm": 0.30562353134155273, |
|
"learning_rate": 4.684508117198648e-05, |
|
"loss": 0.6771, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.18635332921047207, |
|
"grad_norm": 0.3080686628818512, |
|
"learning_rate": 4.567144220838923e-05, |
|
"loss": 0.6759, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.18717790146361574, |
|
"grad_norm": 0.31016868352890015, |
|
"learning_rate": 4.4510046663617996e-05, |
|
"loss": 0.6937, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.18800247371675943, |
|
"grad_norm": 0.30470389127731323, |
|
"learning_rate": 4.336103083258942e-05, |
|
"loss": 0.6954, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1888270459699031, |
|
"grad_norm": 0.3189505338668823, |
|
"learning_rate": 4.2224529557405645e-05, |
|
"loss": 0.7103, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1896516182230468, |
|
"grad_norm": 0.31451600790023804, |
|
"learning_rate": 4.1100676211530404e-05, |
|
"loss": 0.7146, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19047619047619047, |
|
"grad_norm": 0.3073336184024811, |
|
"learning_rate": 3.998960268413666e-05, |
|
"loss": 0.6761, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.19130076272933416, |
|
"grad_norm": 0.31194430589675903, |
|
"learning_rate": 3.889143936462914e-05, |
|
"loss": 0.6936, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.19212533498247783, |
|
"grad_norm": 0.30823662877082825, |
|
"learning_rate": 3.780631512734241e-05, |
|
"loss": 0.6915, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.19294990723562153, |
|
"grad_norm": 0.31047096848487854, |
|
"learning_rate": 3.673435731641691e-05, |
|
"loss": 0.6986, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1937744794887652, |
|
"grad_norm": 0.3224363625049591, |
|
"learning_rate": 3.567569173085454e-05, |
|
"loss": 0.7141, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1945990517419089, |
|
"grad_norm": 0.3098570704460144, |
|
"learning_rate": 3.463044260975566e-05, |
|
"loss": 0.6893, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.19542362399505256, |
|
"grad_norm": 0.30459511280059814, |
|
"learning_rate": 3.3598732617739036e-05, |
|
"loss": 0.6608, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.19624819624819625, |
|
"grad_norm": 0.32793962955474854, |
|
"learning_rate": 3.258068283054666e-05, |
|
"loss": 0.7535, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.19707276850133992, |
|
"grad_norm": 0.31355416774749756, |
|
"learning_rate": 3.1576412720834746e-05, |
|
"loss": 0.6545, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.19789734075448362, |
|
"grad_norm": 0.3191066086292267, |
|
"learning_rate": 3.058604014415343e-05, |
|
"loss": 0.6362, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1987219130076273, |
|
"grad_norm": 0.3128627836704254, |
|
"learning_rate": 2.960968132511567e-05, |
|
"loss": 0.625, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.19954648526077098, |
|
"grad_norm": 0.33767595887184143, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 0.7637, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.20037105751391465, |
|
"grad_norm": 0.32248735427856445, |
|
"learning_rate": 2.7699461622093304e-05, |
|
"loss": 0.6197, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.20119562976705835, |
|
"grad_norm": 0.33070605993270874, |
|
"learning_rate": 2.67658249108603e-05, |
|
"loss": 0.6475, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.20202020202020202, |
|
"grad_norm": 0.3260677754878998, |
|
"learning_rate": 2.584665027646643e-05, |
|
"loss": 0.6493, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.2028447742733457, |
|
"grad_norm": 0.35679200291633606, |
|
"learning_rate": 2.49420455881305e-05, |
|
"loss": 0.6593, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.20366934652648938, |
|
"grad_norm": 0.35772812366485596, |
|
"learning_rate": 2.4052117005223455e-05, |
|
"loss": 0.7124, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.20449391877963308, |
|
"grad_norm": 0.3646053671836853, |
|
"learning_rate": 2.317696896481024e-05, |
|
"loss": 0.6362, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.20531849103277675, |
|
"grad_norm": 0.4201851487159729, |
|
"learning_rate": 2.231670416939364e-05, |
|
"loss": 0.7586, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.20614306328592044, |
|
"grad_norm": 0.7209053635597229, |
|
"learning_rate": 2.147142357486164e-05, |
|
"loss": 0.7507, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20614306328592044, |
|
"eval_loss": 0.6745719313621521, |
|
"eval_runtime": 42.7078, |
|
"eval_samples_per_second": 9.272, |
|
"eval_steps_per_second": 9.272, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2069676355390641, |
|
"grad_norm": 0.2964789569377899, |
|
"learning_rate": 2.0641226378639715e-05, |
|
"loss": 0.6769, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.2077922077922078, |
|
"grad_norm": 0.2993400990962982, |
|
"learning_rate": 1.9826210008049785e-05, |
|
"loss": 0.6628, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20861678004535147, |
|
"grad_norm": 0.3071148991584778, |
|
"learning_rate": 1.902647010887655e-05, |
|
"loss": 0.6987, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20944135229849514, |
|
"grad_norm": 0.3106604814529419, |
|
"learning_rate": 1.8242100534143062e-05, |
|
"loss": 0.6984, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.21026592455163884, |
|
"grad_norm": 0.3057470917701721, |
|
"learning_rate": 1.7473193333096575e-05, |
|
"loss": 0.6721, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2110904968047825, |
|
"grad_norm": 0.319230854511261, |
|
"learning_rate": 1.671983874040631e-05, |
|
"loss": 0.6974, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2119150690579262, |
|
"grad_norm": 0.3045044243335724, |
|
"learning_rate": 1.598212516557394e-05, |
|
"loss": 0.6795, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.21273964131106987, |
|
"grad_norm": 0.3158814013004303, |
|
"learning_rate": 1.526013918255836e-05, |
|
"loss": 0.7189, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.21356421356421357, |
|
"grad_norm": 0.3174242377281189, |
|
"learning_rate": 1.4553965519615723e-05, |
|
"loss": 0.6901, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.21438878581735724, |
|
"grad_norm": 0.3104889392852783, |
|
"learning_rate": 1.3863687049356464e-05, |
|
"loss": 0.6771, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.21521335807050093, |
|
"grad_norm": 0.3152613043785095, |
|
"learning_rate": 1.3189384779019535e-05, |
|
"loss": 0.7524, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.2160379303236446, |
|
"grad_norm": 0.32586848735809326, |
|
"learning_rate": 1.25311378409661e-05, |
|
"loss": 0.733, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.2168625025767883, |
|
"grad_norm": 0.30279600620269775, |
|
"learning_rate": 1.1889023483392879e-05, |
|
"loss": 0.626, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.21768707482993196, |
|
"grad_norm": 0.31115543842315674, |
|
"learning_rate": 1.1263117061266675e-05, |
|
"loss": 0.6681, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21851164708307566, |
|
"grad_norm": 0.31493860483169556, |
|
"learning_rate": 1.0653492027481286e-05, |
|
"loss": 0.6506, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21933621933621933, |
|
"grad_norm": 0.3196756839752197, |
|
"learning_rate": 1.0060219924237379e-05, |
|
"loss": 0.712, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.22016079158936303, |
|
"grad_norm": 0.3244759738445282, |
|
"learning_rate": 9.48337037464666e-06, |
|
"loss": 0.7272, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2209853638425067, |
|
"grad_norm": 0.30338799953460693, |
|
"learning_rate": 8.923011074561404e-06, |
|
"loss": 0.6724, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2218099360956504, |
|
"grad_norm": 0.31627827882766724, |
|
"learning_rate": 8.379207784630004e-06, |
|
"loss": 0.6856, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.22263450834879406, |
|
"grad_norm": 0.31490376591682434, |
|
"learning_rate": 7.852024322579648e-06, |
|
"loss": 0.6818, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.22345908060193775, |
|
"grad_norm": 0.3102516829967499, |
|
"learning_rate": 7.34152255572697e-06, |
|
"loss": 0.6574, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.22428365285508142, |
|
"grad_norm": 0.3195677101612091, |
|
"learning_rate": 6.847762393717782e-06, |
|
"loss": 0.7313, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.22510822510822512, |
|
"grad_norm": 0.3145836591720581, |
|
"learning_rate": 6.370801781496326e-06, |
|
"loss": 0.7007, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2259327973613688, |
|
"grad_norm": 0.3069111704826355, |
|
"learning_rate": 5.910696692505201e-06, |
|
"loss": 0.6735, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.22675736961451248, |
|
"grad_norm": 0.31376153230667114, |
|
"learning_rate": 5.467501122116563e-06, |
|
"loss": 0.7041, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22758194186765615, |
|
"grad_norm": 0.30935463309288025, |
|
"learning_rate": 5.0412670812956465e-06, |
|
"loss": 0.6993, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22840651412079985, |
|
"grad_norm": 0.30352145433425903, |
|
"learning_rate": 4.6320445904969475e-06, |
|
"loss": 0.674, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.22923108637394352, |
|
"grad_norm": 0.317184716463089, |
|
"learning_rate": 4.239881673794165e-06, |
|
"loss": 0.7051, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2300556586270872, |
|
"grad_norm": 0.3102111220359802, |
|
"learning_rate": 3.864824353244367e-06, |
|
"loss": 0.6934, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.23088023088023088, |
|
"grad_norm": 0.31064939498901367, |
|
"learning_rate": 3.506916643487001e-06, |
|
"loss": 0.7003, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.23170480313337455, |
|
"grad_norm": 0.3119175434112549, |
|
"learning_rate": 3.166200546578718e-06, |
|
"loss": 0.6886, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.23252937538651824, |
|
"grad_norm": 0.31088459491729736, |
|
"learning_rate": 2.8427160470641253e-06, |
|
"loss": 0.6805, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2333539476396619, |
|
"grad_norm": 0.3150649070739746, |
|
"learning_rate": 2.5365011072835117e-06, |
|
"loss": 0.7153, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2341785198928056, |
|
"grad_norm": 0.31142279505729675, |
|
"learning_rate": 2.2475916629177415e-06, |
|
"loss": 0.7158, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.23500309214594928, |
|
"grad_norm": 0.3094358444213867, |
|
"learning_rate": 1.9760216187710787e-06, |
|
"loss": 0.6481, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.23582766439909297, |
|
"grad_norm": 0.3161930441856384, |
|
"learning_rate": 1.7218228447922867e-06, |
|
"loss": 0.7038, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.23665223665223664, |
|
"grad_norm": 0.3040229082107544, |
|
"learning_rate": 1.4850251723345196e-06, |
|
"loss": 0.6384, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.23747680890538034, |
|
"grad_norm": 0.30395784974098206, |
|
"learning_rate": 1.2656563906545902e-06, |
|
"loss": 0.6362, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.238301381158524, |
|
"grad_norm": 0.30829399824142456, |
|
"learning_rate": 1.0637422436516274e-06, |
|
"loss": 0.6538, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2391259534116677, |
|
"grad_norm": 0.32518744468688965, |
|
"learning_rate": 8.793064268460604e-07, |
|
"loss": 0.7038, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23995052566481137, |
|
"grad_norm": 0.3239896893501282, |
|
"learning_rate": 7.123705845987093e-07, |
|
"loss": 0.6706, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.24077509791795507, |
|
"grad_norm": 0.31476303935050964, |
|
"learning_rate": 5.629543075708176e-07, |
|
"loss": 0.6519, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.24159967017109873, |
|
"grad_norm": 0.3170252740383148, |
|
"learning_rate": 4.310751304249738e-07, |
|
"loss": 0.6642, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.24242424242424243, |
|
"grad_norm": 0.31761467456817627, |
|
"learning_rate": 3.167485297673411e-07, |
|
"loss": 0.6443, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2432488146773861, |
|
"grad_norm": 0.32578980922698975, |
|
"learning_rate": 2.1998792233142714e-07, |
|
"loss": 0.7081, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2440733869305298, |
|
"grad_norm": 0.32927650213241577, |
|
"learning_rate": 1.4080466340349316e-07, |
|
"loss": 0.6772, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 0.3357813358306885, |
|
"learning_rate": 7.92080454900701e-08, |
|
"loss": 0.6857, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.24572253143681716, |
|
"grad_norm": 0.3505016565322876, |
|
"learning_rate": 3.5205297227380855e-08, |
|
"loss": 0.7116, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.24654710368996083, |
|
"grad_norm": 0.37650617957115173, |
|
"learning_rate": 8.801582533035644e-09, |
|
"loss": 0.6173, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.24737167594310452, |
|
"grad_norm": 0.5433653593063354, |
|
"learning_rate": 0.0, |
|
"loss": 0.6145, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24737167594310452, |
|
"eval_loss": 0.6729200482368469, |
|
"eval_runtime": 42.7292, |
|
"eval_samples_per_second": 9.268, |
|
"eval_steps_per_second": 9.268, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.406657706577101e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|