|
{ |
|
"best_metric": 0.9793991416309012, |
|
"best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-255", |
|
"epoch": 9.882352941176471, |
|
"eval_steps": 500, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.9944203495979309, |
|
"learning_rate": 0.004920634920634921, |
|
"loss": 0.6411, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.986366868019104, |
|
"learning_rate": 0.004841269841269842, |
|
"loss": 0.3966, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.2478023767471313, |
|
"learning_rate": 0.0047619047619047615, |
|
"loss": 0.2835, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.9372655749320984, |
|
"learning_rate": 0.004682539682539683, |
|
"loss": 0.2194, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.834052562713623, |
|
"learning_rate": 0.004603174603174603, |
|
"loss": 0.4268, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.8572702407836914, |
|
"learning_rate": 0.004523809523809524, |
|
"loss": 0.3305, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.936480686695279, |
|
"eval_f1": 0.9119407558733402, |
|
"eval_loss": 0.16002865135669708, |
|
"eval_precision": 0.9478312410980476, |
|
"eval_recall": 0.886801607677731, |
|
"eval_runtime": 18.303, |
|
"eval_samples_per_second": 63.651, |
|
"eval_steps_per_second": 3.988, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.092854619026184, |
|
"learning_rate": 0.0044444444444444444, |
|
"loss": 0.2775, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.632826328277588, |
|
"learning_rate": 0.004365079365079365, |
|
"loss": 0.2432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.9421872496604919, |
|
"learning_rate": 0.004285714285714286, |
|
"loss": 0.2564, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 3.025440216064453, |
|
"learning_rate": 0.004206349206349207, |
|
"loss": 0.2726, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.8582188487052917, |
|
"learning_rate": 0.0041269841269841265, |
|
"loss": 0.2958, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.093877911567688, |
|
"learning_rate": 0.004047619047619048, |
|
"loss": 0.2335, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.9313304721030042, |
|
"eval_f1": 0.9166446055107573, |
|
"eval_loss": 0.15517598390579224, |
|
"eval_precision": 0.8967545322648443, |
|
"eval_recall": 0.9471582920716426, |
|
"eval_runtime": 18.7183, |
|
"eval_samples_per_second": 62.239, |
|
"eval_steps_per_second": 3.9, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 1.588196873664856, |
|
"learning_rate": 0.003968253968253968, |
|
"loss": 0.1984, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.8592113256454468, |
|
"learning_rate": 0.003896825396825397, |
|
"loss": 0.2741, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 1.1290347576141357, |
|
"learning_rate": 0.003817460317460317, |
|
"loss": 0.2253, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.8879281878471375, |
|
"learning_rate": 0.0037380952380952383, |
|
"loss": 0.2726, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.8666319847106934, |
|
"learning_rate": 0.0036587301587301586, |
|
"loss": 0.2226, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 1.2298210859298706, |
|
"learning_rate": 0.0035793650793650793, |
|
"loss": 0.1967, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 0.7905568480491638, |
|
"learning_rate": 0.0034999999999999996, |
|
"loss": 0.1977, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9733905579399141, |
|
"eval_f1": 0.9659005824707372, |
|
"eval_loss": 0.08546662330627441, |
|
"eval_precision": 0.9607843137254902, |
|
"eval_recall": 0.9713575096277278, |
|
"eval_runtime": 18.7005, |
|
"eval_samples_per_second": 62.298, |
|
"eval_steps_per_second": 3.904, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.9821394681930542, |
|
"learning_rate": 0.003420634920634921, |
|
"loss": 0.2158, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.6182402968406677, |
|
"learning_rate": 0.003341269841269841, |
|
"loss": 0.1528, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.6537771821022034, |
|
"learning_rate": 0.003261904761904762, |
|
"loss": 0.2038, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 1.0976545810699463, |
|
"learning_rate": 0.0031825396825396826, |
|
"loss": 0.217, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 0.581605076789856, |
|
"learning_rate": 0.0031031746031746034, |
|
"loss": 0.2044, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.8219888806343079, |
|
"learning_rate": 0.0030238095238095237, |
|
"loss": 0.1746, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9793991416309012, |
|
"eval_f1": 0.972945356587167, |
|
"eval_loss": 0.0870010182261467, |
|
"eval_precision": 0.9794167490467448, |
|
"eval_recall": 0.9669104162846139, |
|
"eval_runtime": 18.7359, |
|
"eval_samples_per_second": 62.18, |
|
"eval_steps_per_second": 3.896, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.45729899406433105, |
|
"learning_rate": 0.0029444444444444444, |
|
"loss": 0.1785, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.9842467904090881, |
|
"learning_rate": 0.002865079365079365, |
|
"loss": 0.1507, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 2.326084852218628, |
|
"learning_rate": 0.002785714285714286, |
|
"loss": 0.2188, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.7227071523666382, |
|
"learning_rate": 0.002706349206349206, |
|
"loss": 0.1853, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 1.1356016397476196, |
|
"learning_rate": 0.002626984126984127, |
|
"loss": 0.1671, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.6211657524108887, |
|
"learning_rate": 0.0025476190476190477, |
|
"loss": 0.1797, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9699570815450643, |
|
"eval_f1": 0.961658208434536, |
|
"eval_loss": 0.0828637108206749, |
|
"eval_precision": 0.95490488153731, |
|
"eval_recall": 0.969034629256067, |
|
"eval_runtime": 18.2407, |
|
"eval_samples_per_second": 63.868, |
|
"eval_steps_per_second": 4.002, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.3646533787250519, |
|
"learning_rate": 0.0024682539682539684, |
|
"loss": 0.1636, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 0.5121908187866211, |
|
"learning_rate": 0.002388888888888889, |
|
"loss": 0.1463, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 0.34603381156921387, |
|
"learning_rate": 0.0023095238095238095, |
|
"loss": 0.1284, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"grad_norm": 1.5245040655136108, |
|
"learning_rate": 0.0022301587301587302, |
|
"loss": 0.1307, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 1.0758203268051147, |
|
"learning_rate": 0.002150793650793651, |
|
"loss": 0.1912, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 0.9437044858932495, |
|
"learning_rate": 0.0020714285714285717, |
|
"loss": 0.1942, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 0.6630730628967285, |
|
"learning_rate": 0.001992063492063492, |
|
"loss": 0.1436, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9708154506437768, |
|
"eval_f1": 0.9627915802446869, |
|
"eval_loss": 0.0797128826379776, |
|
"eval_precision": 0.9556120562130177, |
|
"eval_recall": 0.9706793660981723, |
|
"eval_runtime": 18.3826, |
|
"eval_samples_per_second": 63.375, |
|
"eval_steps_per_second": 3.971, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.5699043869972229, |
|
"learning_rate": 0.0019126984126984126, |
|
"loss": 0.1704, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"grad_norm": 0.6287882328033447, |
|
"learning_rate": 0.0018333333333333333, |
|
"loss": 0.1034, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"grad_norm": 0.4099302589893341, |
|
"learning_rate": 0.0017539682539682538, |
|
"loss": 0.1633, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.24668000638484955, |
|
"learning_rate": 0.0016746031746031746, |
|
"loss": 0.1477, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.6959215998649597, |
|
"learning_rate": 0.001595238095238095, |
|
"loss": 0.1578, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 0.4429934620857239, |
|
"learning_rate": 0.0015158730158730158, |
|
"loss": 0.1632, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9699570815450643, |
|
"eval_f1": 0.9621164611060785, |
|
"eval_loss": 0.08162170648574829, |
|
"eval_precision": 0.9507741239032943, |
|
"eval_recall": 0.9754187297512072, |
|
"eval_runtime": 18.5501, |
|
"eval_samples_per_second": 62.803, |
|
"eval_steps_per_second": 3.935, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 0.3358498513698578, |
|
"learning_rate": 0.0014365079365079364, |
|
"loss": 0.1628, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 1.001493215560913, |
|
"learning_rate": 0.0013571428571428571, |
|
"loss": 0.1495, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 1.0969635248184204, |
|
"learning_rate": 0.0012777777777777776, |
|
"loss": 0.1972, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 0.4329684376716614, |
|
"learning_rate": 0.0011984126984126984, |
|
"loss": 0.1538, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"grad_norm": 0.426087886095047, |
|
"learning_rate": 0.0011190476190476191, |
|
"loss": 0.1291, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.24835826456546783, |
|
"learning_rate": 0.0010396825396825396, |
|
"loss": 0.1301, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.3314363956451416, |
|
"learning_rate": 0.0009603174603174604, |
|
"loss": 0.1125, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9613733905579399, |
|
"eval_f1": 0.9519463889441405, |
|
"eval_loss": 0.10071194916963577, |
|
"eval_precision": 0.9364616472251679, |
|
"eval_recall": 0.9717395623204352, |
|
"eval_runtime": 18.4952, |
|
"eval_samples_per_second": 62.989, |
|
"eval_steps_per_second": 3.947, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.4465714693069458, |
|
"learning_rate": 0.000880952380952381, |
|
"loss": 0.1279, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 0.7344756126403809, |
|
"learning_rate": 0.0008015873015873017, |
|
"loss": 0.1408, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"grad_norm": 2.2782132625579834, |
|
"learning_rate": 0.0007222222222222222, |
|
"loss": 0.134, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"grad_norm": 0.8408999443054199, |
|
"learning_rate": 0.0006428571428571428, |
|
"loss": 0.1193, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.4357975721359253, |
|
"learning_rate": 0.0005634920634920636, |
|
"loss": 0.114, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 0.6790524125099182, |
|
"learning_rate": 0.00048412698412698415, |
|
"loss": 0.1076, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9690987124463519, |
|
"eval_f1": 0.9612241124260354, |
|
"eval_loss": 0.09004171937704086, |
|
"eval_precision": 0.9481984892871503, |
|
"eval_recall": 0.9769660431566722, |
|
"eval_runtime": 18.3674, |
|
"eval_samples_per_second": 63.427, |
|
"eval_steps_per_second": 3.974, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.38498663902282715, |
|
"learning_rate": 0.0004047619047619048, |
|
"loss": 0.1174, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.7055544853210449, |
|
"learning_rate": 0.0003253968253968254, |
|
"loss": 0.0951, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.8157325387001038, |
|
"learning_rate": 0.00024603174603174605, |
|
"loss": 0.1397, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"grad_norm": 0.6511685252189636, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.1296, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 0.5405935049057007, |
|
"learning_rate": 8.730158730158731e-05, |
|
"loss": 0.1028, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 0.472501665353775, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.1188, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.9622317596566523, |
|
"eval_f1": 0.9529697156530292, |
|
"eval_loss": 0.10635051876306534, |
|
"eval_precision": 0.9377354615755036, |
|
"eval_recall": 0.9723202824133504, |
|
"eval_runtime": 18.4162, |
|
"eval_samples_per_second": 63.26, |
|
"eval_steps_per_second": 3.964, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"step": 630, |
|
"total_flos": 7.138406630605308e+18, |
|
"train_loss": 0.19363035304205758, |
|
"train_runtime": 1305.2945, |
|
"train_samples_per_second": 31.234, |
|
"train_steps_per_second": 0.483 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7.138406630605308e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|