{ "best_metric": 0.4301721155643463, "best_model_checkpoint": "./vit-skin-demo-v1/checkpoint-1200", "epoch": 4.0, "eval_steps": 100, "global_step": 1284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019859813084112152, "loss": 1.3761, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.00019704049844236762, "loss": 0.9425, "step": 20 }, { "epoch": 0.09, "learning_rate": 0.00019548286604361372, "loss": 0.9102, "step": 30 }, { "epoch": 0.12, "learning_rate": 0.00019392523364485982, "loss": 0.8726, "step": 40 }, { "epoch": 0.16, "learning_rate": 0.00019236760124610595, "loss": 0.8659, "step": 50 }, { "epoch": 0.19, "learning_rate": 0.00019080996884735202, "loss": 0.8507, "step": 60 }, { "epoch": 0.22, "learning_rate": 0.00018925233644859814, "loss": 0.8446, "step": 70 }, { "epoch": 0.25, "learning_rate": 0.00018769470404984424, "loss": 0.941, "step": 80 }, { "epoch": 0.28, "learning_rate": 0.00018613707165109034, "loss": 0.7384, "step": 90 }, { "epoch": 0.31, "learning_rate": 0.00018457943925233644, "loss": 0.7377, "step": 100 }, { "epoch": 0.31, "eval_accuracy": 0.7553058676654182, "eval_loss": 0.7304826974868774, "eval_runtime": 19.4854, "eval_samples_per_second": 82.215, "eval_steps_per_second": 10.315, "step": 100 }, { "epoch": 0.34, "learning_rate": 0.00018302180685358257, "loss": 0.7471, "step": 110 }, { "epoch": 0.37, "learning_rate": 0.00018146417445482867, "loss": 0.6848, "step": 120 }, { "epoch": 0.4, "learning_rate": 0.00017990654205607477, "loss": 0.7872, "step": 130 }, { "epoch": 0.44, "learning_rate": 0.00017834890965732087, "loss": 0.7769, "step": 140 }, { "epoch": 0.47, "learning_rate": 0.000176791277258567, "loss": 0.7049, "step": 150 }, { "epoch": 0.5, "learning_rate": 0.00017523364485981307, "loss": 0.4937, "step": 160 }, { "epoch": 0.53, "learning_rate": 0.0001736760124610592, "loss": 0.7708, "step": 170 }, { "epoch": 0.56, "learning_rate": 0.0001721183800623053, "loss": 0.891, "step": 180 }, { "epoch": 0.59, "learning_rate": 0.00017056074766355142, "loss": 0.7497, "step": 190 }, { "epoch": 0.62, "learning_rate": 0.0001690031152647975, "loss": 0.8988, "step": 200 }, { "epoch": 0.62, "eval_accuracy": 0.7540574282147315, "eval_loss": 0.6799274682998657, "eval_runtime": 19.407, "eval_samples_per_second": 82.548, "eval_steps_per_second": 10.357, "step": 200 }, { "epoch": 0.65, "learning_rate": 0.00016744548286604362, "loss": 0.6315, "step": 210 }, { "epoch": 0.69, "learning_rate": 0.00016588785046728972, "loss": 0.8089, "step": 220 }, { "epoch": 0.72, "learning_rate": 0.00016433021806853582, "loss": 0.6893, "step": 230 }, { "epoch": 0.75, "learning_rate": 0.00016277258566978192, "loss": 0.7498, "step": 240 }, { "epoch": 0.78, "learning_rate": 0.00016121495327102805, "loss": 0.8541, "step": 250 }, { "epoch": 0.81, "learning_rate": 0.00015965732087227415, "loss": 0.6608, "step": 260 }, { "epoch": 0.84, "learning_rate": 0.00015809968847352025, "loss": 0.6824, "step": 270 }, { "epoch": 0.87, "learning_rate": 0.00015654205607476635, "loss": 0.7043, "step": 280 }, { "epoch": 0.9, "learning_rate": 0.00015498442367601248, "loss": 0.6091, "step": 290 }, { "epoch": 0.93, "learning_rate": 0.00015342679127725855, "loss": 0.7157, "step": 300 }, { "epoch": 0.93, "eval_accuracy": 0.7771535580524345, "eval_loss": 0.6038566827774048, "eval_runtime": 19.6645, "eval_samples_per_second": 81.467, "eval_steps_per_second": 10.221, "step": 300 }, { "epoch": 0.97, "learning_rate": 0.00015186915887850468, "loss": 0.517, "step": 310 }, { "epoch": 1.0, "learning_rate": 0.00015031152647975078, "loss": 0.6309, "step": 320 }, { "epoch": 1.03, "learning_rate": 0.0001487538940809969, "loss": 0.7726, "step": 330 }, { "epoch": 1.06, "learning_rate": 0.000147196261682243, "loss": 0.6196, "step": 340 }, { "epoch": 1.09, "learning_rate": 0.0001456386292834891, "loss": 0.4591, "step": 350 }, { "epoch": 1.12, "learning_rate": 0.00014408099688473523, "loss": 0.5935, "step": 360 }, { "epoch": 1.15, "learning_rate": 0.0001425233644859813, "loss": 0.5606, "step": 370 }, { "epoch": 1.18, "learning_rate": 0.00014096573208722743, "loss": 0.5984, "step": 380 }, { "epoch": 1.21, "learning_rate": 0.00013940809968847353, "loss": 0.5357, "step": 390 }, { "epoch": 1.25, "learning_rate": 0.00013785046728971963, "loss": 0.5569, "step": 400 }, { "epoch": 1.25, "eval_accuracy": 0.7578027465667915, "eval_loss": 0.6506312489509583, "eval_runtime": 18.5005, "eval_samples_per_second": 86.592, "eval_steps_per_second": 10.865, "step": 400 }, { "epoch": 1.28, "learning_rate": 0.00013629283489096573, "loss": 0.5707, "step": 410 }, { "epoch": 1.31, "learning_rate": 0.00013473520249221186, "loss": 0.5719, "step": 420 }, { "epoch": 1.34, "learning_rate": 0.00013317757009345796, "loss": 0.5915, "step": 430 }, { "epoch": 1.37, "learning_rate": 0.00013161993769470406, "loss": 0.5368, "step": 440 }, { "epoch": 1.4, "learning_rate": 0.00013006230529595016, "loss": 0.6008, "step": 450 }, { "epoch": 1.43, "learning_rate": 0.00012850467289719628, "loss": 0.5129, "step": 460 }, { "epoch": 1.46, "learning_rate": 0.00012694704049844238, "loss": 0.4853, "step": 470 }, { "epoch": 1.5, "learning_rate": 0.00012538940809968848, "loss": 0.6041, "step": 480 }, { "epoch": 1.53, "learning_rate": 0.00012383177570093458, "loss": 0.701, "step": 490 }, { "epoch": 1.56, "learning_rate": 0.0001222741433021807, "loss": 0.5342, "step": 500 }, { "epoch": 1.56, "eval_accuracy": 0.7846441947565543, "eval_loss": 0.5928722620010376, "eval_runtime": 19.6731, "eval_samples_per_second": 81.431, "eval_steps_per_second": 10.217, "step": 500 }, { "epoch": 1.59, "learning_rate": 0.0001207165109034268, "loss": 0.6391, "step": 510 }, { "epoch": 1.62, "learning_rate": 0.00011915887850467291, "loss": 0.5824, "step": 520 }, { "epoch": 1.65, "learning_rate": 0.00011760124610591901, "loss": 0.5496, "step": 530 }, { "epoch": 1.68, "learning_rate": 0.00011604361370716512, "loss": 0.709, "step": 540 }, { "epoch": 1.71, "learning_rate": 0.00011448598130841121, "loss": 0.6867, "step": 550 }, { "epoch": 1.74, "learning_rate": 0.00011292834890965734, "loss": 0.4701, "step": 560 }, { "epoch": 1.78, "learning_rate": 0.00011137071651090342, "loss": 0.6941, "step": 570 }, { "epoch": 1.81, "learning_rate": 0.00010981308411214955, "loss": 0.5546, "step": 580 }, { "epoch": 1.84, "learning_rate": 0.00010825545171339564, "loss": 0.3854, "step": 590 }, { "epoch": 1.87, "learning_rate": 0.00010669781931464175, "loss": 0.6498, "step": 600 }, { "epoch": 1.87, "eval_accuracy": 0.7952559300873908, "eval_loss": 0.5553368926048279, "eval_runtime": 18.8966, "eval_samples_per_second": 84.777, "eval_steps_per_second": 10.637, "step": 600 }, { "epoch": 1.9, "learning_rate": 0.00010514018691588785, "loss": 0.5885, "step": 610 }, { "epoch": 1.93, "learning_rate": 0.00010358255451713396, "loss": 0.5098, "step": 620 }, { "epoch": 1.96, "learning_rate": 0.00010202492211838006, "loss": 0.5097, "step": 630 }, { "epoch": 1.99, "learning_rate": 0.00010046728971962618, "loss": 0.5651, "step": 640 }, { "epoch": 2.02, "learning_rate": 9.890965732087229e-05, "loss": 0.3913, "step": 650 }, { "epoch": 2.06, "learning_rate": 9.735202492211839e-05, "loss": 0.5351, "step": 660 }, { "epoch": 2.09, "learning_rate": 9.579439252336449e-05, "loss": 0.3227, "step": 670 }, { "epoch": 2.12, "learning_rate": 9.42367601246106e-05, "loss": 0.4168, "step": 680 }, { "epoch": 2.15, "learning_rate": 9.26791277258567e-05, "loss": 0.4675, "step": 690 }, { "epoch": 2.18, "learning_rate": 9.112149532710282e-05, "loss": 0.4956, "step": 700 }, { "epoch": 2.18, "eval_accuracy": 0.7921348314606742, "eval_loss": 0.5429254174232483, "eval_runtime": 18.9872, "eval_samples_per_second": 84.373, "eval_steps_per_second": 10.586, "step": 700 }, { "epoch": 2.21, "learning_rate": 8.956386292834892e-05, "loss": 0.4092, "step": 710 }, { "epoch": 2.24, "learning_rate": 8.800623052959502e-05, "loss": 0.4268, "step": 720 }, { "epoch": 2.27, "learning_rate": 8.644859813084113e-05, "loss": 0.4388, "step": 730 }, { "epoch": 2.31, "learning_rate": 8.489096573208723e-05, "loss": 0.3978, "step": 740 }, { "epoch": 2.34, "learning_rate": 8.333333333333334e-05, "loss": 0.5106, "step": 750 }, { "epoch": 2.37, "learning_rate": 8.177570093457944e-05, "loss": 0.4768, "step": 760 }, { "epoch": 2.4, "learning_rate": 8.021806853582556e-05, "loss": 0.5389, "step": 770 }, { "epoch": 2.43, "learning_rate": 7.866043613707166e-05, "loss": 0.4351, "step": 780 }, { "epoch": 2.46, "learning_rate": 7.710280373831776e-05, "loss": 0.4425, "step": 790 }, { "epoch": 2.49, "learning_rate": 7.554517133956387e-05, "loss": 0.5216, "step": 800 }, { "epoch": 2.49, "eval_accuracy": 0.8302122347066168, "eval_loss": 0.4704440236091614, "eval_runtime": 18.9227, "eval_samples_per_second": 84.66, "eval_steps_per_second": 10.622, "step": 800 }, { "epoch": 2.52, "learning_rate": 7.398753894080997e-05, "loss": 0.4263, "step": 810 }, { "epoch": 2.55, "learning_rate": 7.242990654205608e-05, "loss": 0.346, "step": 820 }, { "epoch": 2.59, "learning_rate": 7.087227414330218e-05, "loss": 0.4664, "step": 830 }, { "epoch": 2.62, "learning_rate": 6.93146417445483e-05, "loss": 0.3724, "step": 840 }, { "epoch": 2.65, "learning_rate": 6.77570093457944e-05, "loss": 0.4354, "step": 850 }, { "epoch": 2.68, "learning_rate": 6.61993769470405e-05, "loss": 0.534, "step": 860 }, { "epoch": 2.71, "learning_rate": 6.464174454828661e-05, "loss": 0.5173, "step": 870 }, { "epoch": 2.74, "learning_rate": 6.308411214953271e-05, "loss": 0.4208, "step": 880 }, { "epoch": 2.77, "learning_rate": 6.152647975077882e-05, "loss": 0.4199, "step": 890 }, { "epoch": 2.8, "learning_rate": 5.9968847352024924e-05, "loss": 0.3468, "step": 900 }, { "epoch": 2.8, "eval_accuracy": 0.83270911360799, "eval_loss": 0.46694162487983704, "eval_runtime": 19.215, "eval_samples_per_second": 83.372, "eval_steps_per_second": 10.461, "step": 900 }, { "epoch": 2.83, "learning_rate": 5.841121495327103e-05, "loss": 0.3722, "step": 910 }, { "epoch": 2.87, "learning_rate": 5.685358255451714e-05, "loss": 0.3283, "step": 920 }, { "epoch": 2.9, "learning_rate": 5.5295950155763244e-05, "loss": 0.4791, "step": 930 }, { "epoch": 2.93, "learning_rate": 5.373831775700935e-05, "loss": 0.3859, "step": 940 }, { "epoch": 2.96, "learning_rate": 5.218068535825546e-05, "loss": 0.2583, "step": 950 }, { "epoch": 2.99, "learning_rate": 5.062305295950156e-05, "loss": 0.318, "step": 960 }, { "epoch": 3.02, "learning_rate": 4.9065420560747664e-05, "loss": 0.3943, "step": 970 }, { "epoch": 3.05, "learning_rate": 4.750778816199377e-05, "loss": 0.2449, "step": 980 }, { "epoch": 3.08, "learning_rate": 4.595015576323988e-05, "loss": 0.204, "step": 990 }, { "epoch": 3.12, "learning_rate": 4.4392523364485984e-05, "loss": 0.4862, "step": 1000 }, { "epoch": 3.12, "eval_accuracy": 0.8420724094881398, "eval_loss": 0.4615306556224823, "eval_runtime": 19.2799, "eval_samples_per_second": 83.092, "eval_steps_per_second": 10.425, "step": 1000 }, { "epoch": 3.15, "learning_rate": 4.283489096573209e-05, "loss": 0.2649, "step": 1010 }, { "epoch": 3.18, "learning_rate": 4.12772585669782e-05, "loss": 0.3136, "step": 1020 }, { "epoch": 3.21, "learning_rate": 3.97196261682243e-05, "loss": 0.2549, "step": 1030 }, { "epoch": 3.24, "learning_rate": 3.8161993769470404e-05, "loss": 0.2944, "step": 1040 }, { "epoch": 3.27, "learning_rate": 3.660436137071651e-05, "loss": 0.3809, "step": 1050 }, { "epoch": 3.3, "learning_rate": 3.504672897196262e-05, "loss": 0.2504, "step": 1060 }, { "epoch": 3.33, "learning_rate": 3.3489096573208724e-05, "loss": 0.4471, "step": 1070 }, { "epoch": 3.36, "learning_rate": 3.193146417445483e-05, "loss": 0.2706, "step": 1080 }, { "epoch": 3.4, "learning_rate": 3.0373831775700934e-05, "loss": 0.2495, "step": 1090 }, { "epoch": 3.43, "learning_rate": 2.881619937694704e-05, "loss": 0.4018, "step": 1100 }, { "epoch": 3.43, "eval_accuracy": 0.8458177278401997, "eval_loss": 0.4525544345378876, "eval_runtime": 18.7511, "eval_samples_per_second": 85.435, "eval_steps_per_second": 10.719, "step": 1100 }, { "epoch": 3.46, "learning_rate": 2.7258566978193147e-05, "loss": 0.2398, "step": 1110 }, { "epoch": 3.49, "learning_rate": 2.570093457943925e-05, "loss": 0.3493, "step": 1120 }, { "epoch": 3.52, "learning_rate": 2.414330218068536e-05, "loss": 0.1811, "step": 1130 }, { "epoch": 3.55, "learning_rate": 2.2585669781931463e-05, "loss": 0.2008, "step": 1140 }, { "epoch": 3.58, "learning_rate": 2.102803738317757e-05, "loss": 0.2805, "step": 1150 }, { "epoch": 3.61, "learning_rate": 1.9470404984423677e-05, "loss": 0.2142, "step": 1160 }, { "epoch": 3.64, "learning_rate": 1.7912772585669783e-05, "loss": 0.2079, "step": 1170 }, { "epoch": 3.68, "learning_rate": 1.6355140186915887e-05, "loss": 0.3097, "step": 1180 }, { "epoch": 3.71, "learning_rate": 1.4797507788161993e-05, "loss": 0.2846, "step": 1190 }, { "epoch": 3.74, "learning_rate": 1.3239875389408098e-05, "loss": 0.302, "step": 1200 }, { "epoch": 3.74, "eval_accuracy": 0.8558052434456929, "eval_loss": 0.4301721155643463, "eval_runtime": 19.5305, "eval_samples_per_second": 82.026, "eval_steps_per_second": 10.292, "step": 1200 }, { "epoch": 3.77, "learning_rate": 1.1682242990654207e-05, "loss": 0.3489, "step": 1210 }, { "epoch": 3.8, "learning_rate": 1.0124610591900312e-05, "loss": 0.3015, "step": 1220 }, { "epoch": 3.83, "learning_rate": 8.566978193146417e-06, "loss": 0.231, "step": 1230 }, { "epoch": 3.86, "learning_rate": 7.009345794392523e-06, "loss": 0.192, "step": 1240 }, { "epoch": 3.89, "learning_rate": 5.451713395638629e-06, "loss": 0.2904, "step": 1250 }, { "epoch": 3.93, "learning_rate": 3.894080996884735e-06, "loss": 0.412, "step": 1260 }, { "epoch": 3.96, "learning_rate": 2.3364485981308413e-06, "loss": 0.2304, "step": 1270 }, { "epoch": 3.99, "learning_rate": 7.788161993769471e-07, "loss": 0.293, "step": 1280 }, { "epoch": 4.0, "step": 1284, "total_flos": 1.5895869226063626e+18, "train_loss": 0.5172773925036285, "train_runtime": 727.4962, "train_samples_per_second": 28.195, "train_steps_per_second": 1.765 } ], "logging_steps": 10, "max_steps": 1284, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "total_flos": 1.5895869226063626e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }