|
{ |
|
"best_metric": 0.4301721155643463, |
|
"best_model_checkpoint": "./vit-skin-demo-v1/checkpoint-1200", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1284, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019859813084112152, |
|
"loss": 1.3761, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019704049844236762, |
|
"loss": 0.9425, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019548286604361372, |
|
"loss": 0.9102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019392523364485982, |
|
"loss": 0.8726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019236760124610595, |
|
"loss": 0.8659, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019080996884735202, |
|
"loss": 0.8507, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00018925233644859814, |
|
"loss": 0.8446, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018769470404984424, |
|
"loss": 0.941, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018613707165109034, |
|
"loss": 0.7384, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00018457943925233644, |
|
"loss": 0.7377, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.7553058676654182, |
|
"eval_loss": 0.7304826974868774, |
|
"eval_runtime": 19.4854, |
|
"eval_samples_per_second": 82.215, |
|
"eval_steps_per_second": 10.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00018302180685358257, |
|
"loss": 0.7471, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018146417445482867, |
|
"loss": 0.6848, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00017990654205607477, |
|
"loss": 0.7872, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017834890965732087, |
|
"loss": 0.7769, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000176791277258567, |
|
"loss": 0.7049, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00017523364485981307, |
|
"loss": 0.4937, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001736760124610592, |
|
"loss": 0.7708, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001721183800623053, |
|
"loss": 0.891, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00017056074766355142, |
|
"loss": 0.7497, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001690031152647975, |
|
"loss": 0.8988, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.7540574282147315, |
|
"eval_loss": 0.6799274682998657, |
|
"eval_runtime": 19.407, |
|
"eval_samples_per_second": 82.548, |
|
"eval_steps_per_second": 10.357, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016744548286604362, |
|
"loss": 0.6315, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00016588785046728972, |
|
"loss": 0.8089, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00016433021806853582, |
|
"loss": 0.6893, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00016277258566978192, |
|
"loss": 0.7498, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016121495327102805, |
|
"loss": 0.8541, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00015965732087227415, |
|
"loss": 0.6608, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00015809968847352025, |
|
"loss": 0.6824, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00015654205607476635, |
|
"loss": 0.7043, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015498442367601248, |
|
"loss": 0.6091, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00015342679127725855, |
|
"loss": 0.7157, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.7771535580524345, |
|
"eval_loss": 0.6038566827774048, |
|
"eval_runtime": 19.6645, |
|
"eval_samples_per_second": 81.467, |
|
"eval_steps_per_second": 10.221, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015186915887850468, |
|
"loss": 0.517, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015031152647975078, |
|
"loss": 0.6309, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001487538940809969, |
|
"loss": 0.7726, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.000147196261682243, |
|
"loss": 0.6196, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001456386292834891, |
|
"loss": 0.4591, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014408099688473523, |
|
"loss": 0.5935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001425233644859813, |
|
"loss": 0.5606, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014096573208722743, |
|
"loss": 0.5984, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00013940809968847353, |
|
"loss": 0.5357, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013785046728971963, |
|
"loss": 0.5569, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.7578027465667915, |
|
"eval_loss": 0.6506312489509583, |
|
"eval_runtime": 18.5005, |
|
"eval_samples_per_second": 86.592, |
|
"eval_steps_per_second": 10.865, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00013629283489096573, |
|
"loss": 0.5707, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00013473520249221186, |
|
"loss": 0.5719, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00013317757009345796, |
|
"loss": 0.5915, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00013161993769470406, |
|
"loss": 0.5368, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00013006230529595016, |
|
"loss": 0.6008, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00012850467289719628, |
|
"loss": 0.5129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00012694704049844238, |
|
"loss": 0.4853, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00012538940809968848, |
|
"loss": 0.6041, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00012383177570093458, |
|
"loss": 0.701, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001222741433021807, |
|
"loss": 0.5342, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7846441947565543, |
|
"eval_loss": 0.5928722620010376, |
|
"eval_runtime": 19.6731, |
|
"eval_samples_per_second": 81.431, |
|
"eval_steps_per_second": 10.217, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001207165109034268, |
|
"loss": 0.6391, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00011915887850467291, |
|
"loss": 0.5824, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00011760124610591901, |
|
"loss": 0.5496, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00011604361370716512, |
|
"loss": 0.709, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00011448598130841121, |
|
"loss": 0.6867, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011292834890965734, |
|
"loss": 0.4701, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00011137071651090342, |
|
"loss": 0.6941, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010981308411214955, |
|
"loss": 0.5546, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00010825545171339564, |
|
"loss": 0.3854, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00010669781931464175, |
|
"loss": 0.6498, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.7952559300873908, |
|
"eval_loss": 0.5553368926048279, |
|
"eval_runtime": 18.8966, |
|
"eval_samples_per_second": 84.777, |
|
"eval_steps_per_second": 10.637, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00010514018691588785, |
|
"loss": 0.5885, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010358255451713396, |
|
"loss": 0.5098, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010202492211838006, |
|
"loss": 0.5097, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010046728971962618, |
|
"loss": 0.5651, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.890965732087229e-05, |
|
"loss": 0.3913, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.735202492211839e-05, |
|
"loss": 0.5351, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.579439252336449e-05, |
|
"loss": 0.3227, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.42367601246106e-05, |
|
"loss": 0.4168, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.26791277258567e-05, |
|
"loss": 0.4675, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.112149532710282e-05, |
|
"loss": 0.4956, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_accuracy": 0.7921348314606742, |
|
"eval_loss": 0.5429254174232483, |
|
"eval_runtime": 18.9872, |
|
"eval_samples_per_second": 84.373, |
|
"eval_steps_per_second": 10.586, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.956386292834892e-05, |
|
"loss": 0.4092, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.800623052959502e-05, |
|
"loss": 0.4268, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.644859813084113e-05, |
|
"loss": 0.4388, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.489096573208723e-05, |
|
"loss": 0.3978, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.5106, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.177570093457944e-05, |
|
"loss": 0.4768, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 8.021806853582556e-05, |
|
"loss": 0.5389, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.866043613707166e-05, |
|
"loss": 0.4351, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.710280373831776e-05, |
|
"loss": 0.4425, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.554517133956387e-05, |
|
"loss": 0.5216, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.8302122347066168, |
|
"eval_loss": 0.4704440236091614, |
|
"eval_runtime": 18.9227, |
|
"eval_samples_per_second": 84.66, |
|
"eval_steps_per_second": 10.622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.398753894080997e-05, |
|
"loss": 0.4263, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.242990654205608e-05, |
|
"loss": 0.346, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.087227414330218e-05, |
|
"loss": 0.4664, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.93146417445483e-05, |
|
"loss": 0.3724, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.77570093457944e-05, |
|
"loss": 0.4354, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.61993769470405e-05, |
|
"loss": 0.534, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.464174454828661e-05, |
|
"loss": 0.5173, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.308411214953271e-05, |
|
"loss": 0.4208, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.152647975077882e-05, |
|
"loss": 0.4199, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.9968847352024924e-05, |
|
"loss": 0.3468, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.83270911360799, |
|
"eval_loss": 0.46694162487983704, |
|
"eval_runtime": 19.215, |
|
"eval_samples_per_second": 83.372, |
|
"eval_steps_per_second": 10.461, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.841121495327103e-05, |
|
"loss": 0.3722, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.685358255451714e-05, |
|
"loss": 0.3283, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.5295950155763244e-05, |
|
"loss": 0.4791, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.373831775700935e-05, |
|
"loss": 0.3859, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.218068535825546e-05, |
|
"loss": 0.2583, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.062305295950156e-05, |
|
"loss": 0.318, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.9065420560747664e-05, |
|
"loss": 0.3943, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.750778816199377e-05, |
|
"loss": 0.2449, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.595015576323988e-05, |
|
"loss": 0.204, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.4392523364485984e-05, |
|
"loss": 0.4862, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.8420724094881398, |
|
"eval_loss": 0.4615306556224823, |
|
"eval_runtime": 19.2799, |
|
"eval_samples_per_second": 83.092, |
|
"eval_steps_per_second": 10.425, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.283489096573209e-05, |
|
"loss": 0.2649, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.12772585669782e-05, |
|
"loss": 0.3136, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.97196261682243e-05, |
|
"loss": 0.2549, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.8161993769470404e-05, |
|
"loss": 0.2944, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.660436137071651e-05, |
|
"loss": 0.3809, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.504672897196262e-05, |
|
"loss": 0.2504, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3489096573208724e-05, |
|
"loss": 0.4471, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.193146417445483e-05, |
|
"loss": 0.2706, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.0373831775700934e-05, |
|
"loss": 0.2495, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.881619937694704e-05, |
|
"loss": 0.4018, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_accuracy": 0.8458177278401997, |
|
"eval_loss": 0.4525544345378876, |
|
"eval_runtime": 18.7511, |
|
"eval_samples_per_second": 85.435, |
|
"eval_steps_per_second": 10.719, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.7258566978193147e-05, |
|
"loss": 0.2398, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.570093457943925e-05, |
|
"loss": 0.3493, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.414330218068536e-05, |
|
"loss": 0.1811, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.2585669781931463e-05, |
|
"loss": 0.2008, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.102803738317757e-05, |
|
"loss": 0.2805, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.9470404984423677e-05, |
|
"loss": 0.2142, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.7912772585669783e-05, |
|
"loss": 0.2079, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.6355140186915887e-05, |
|
"loss": 0.3097, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.4797507788161993e-05, |
|
"loss": 0.2846, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.3239875389408098e-05, |
|
"loss": 0.302, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.8558052434456929, |
|
"eval_loss": 0.4301721155643463, |
|
"eval_runtime": 19.5305, |
|
"eval_samples_per_second": 82.026, |
|
"eval_steps_per_second": 10.292, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.1682242990654207e-05, |
|
"loss": 0.3489, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.0124610591900312e-05, |
|
"loss": 0.3015, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 8.566978193146417e-06, |
|
"loss": 0.231, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.009345794392523e-06, |
|
"loss": 0.192, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.451713395638629e-06, |
|
"loss": 0.2904, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.894080996884735e-06, |
|
"loss": 0.412, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"loss": 0.2304, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 7.788161993769471e-07, |
|
"loss": 0.293, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1284, |
|
"total_flos": 1.5895869226063626e+18, |
|
"train_loss": 0.5172773925036285, |
|
"train_runtime": 727.4962, |
|
"train_samples_per_second": 28.195, |
|
"train_steps_per_second": 1.765 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1284, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 1.5895869226063626e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|