|
{ |
|
"best_metric": 0.4636478126049042, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pawsx/distillbeto/epochs_4_bs_32_lr_5e-5/checkpoint-2100", |
|
"epoch": 4.0, |
|
"global_step": 6176, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.5805000066757202, |
|
"eval_loss": 0.6699569225311279, |
|
"eval_runtime": 0.7381, |
|
"eval_samples_per_second": 2709.802, |
|
"eval_steps_per_second": 85.359, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.596826424870467e-05, |
|
"loss": 0.6641, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.6809999942779541, |
|
"eval_loss": 0.5914173722267151, |
|
"eval_runtime": 0.7361, |
|
"eval_samples_per_second": 2717.087, |
|
"eval_steps_per_second": 85.588, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7275000214576721, |
|
"eval_loss": 0.5336065888404846, |
|
"eval_runtime": 0.7255, |
|
"eval_samples_per_second": 2756.65, |
|
"eval_steps_per_second": 86.834, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.192033678756477e-05, |
|
"loss": 0.5478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.746999979019165, |
|
"eval_loss": 0.5002644062042236, |
|
"eval_runtime": 0.7294, |
|
"eval_samples_per_second": 2741.855, |
|
"eval_steps_per_second": 86.368, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.787240932642487e-05, |
|
"loss": 0.4937, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.7515000104904175, |
|
"eval_loss": 0.493168443441391, |
|
"eval_runtime": 0.7379, |
|
"eval_samples_per_second": 2710.282, |
|
"eval_steps_per_second": 85.374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.7570000290870667, |
|
"eval_loss": 0.5791745781898499, |
|
"eval_runtime": 0.7412, |
|
"eval_samples_per_second": 2698.347, |
|
"eval_steps_per_second": 84.998, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.3832577720207255e-05, |
|
"loss": 0.3997, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7749999761581421, |
|
"eval_loss": 0.4636478126049042, |
|
"eval_runtime": 0.7343, |
|
"eval_samples_per_second": 2723.768, |
|
"eval_steps_per_second": 85.799, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.7730000019073486, |
|
"eval_loss": 0.4842735230922699, |
|
"eval_runtime": 0.728, |
|
"eval_samples_per_second": 2747.191, |
|
"eval_steps_per_second": 86.537, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.978465025906736e-05, |
|
"loss": 0.3796, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.7789999842643738, |
|
"eval_loss": 0.4961821436882019, |
|
"eval_runtime": 0.7387, |
|
"eval_samples_per_second": 2707.452, |
|
"eval_steps_per_second": 85.285, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.5736722797927466e-05, |
|
"loss": 0.3598, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7879999876022339, |
|
"eval_loss": 0.46712586283683777, |
|
"eval_runtime": 0.7391, |
|
"eval_samples_per_second": 2705.908, |
|
"eval_steps_per_second": 85.236, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7885000109672546, |
|
"eval_loss": 0.5191779732704163, |
|
"eval_runtime": 0.7313, |
|
"eval_samples_per_second": 2734.943, |
|
"eval_steps_per_second": 86.151, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.1688795336787565e-05, |
|
"loss": 0.2884, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.7925000190734863, |
|
"eval_loss": 0.5480718612670898, |
|
"eval_runtime": 0.7293, |
|
"eval_samples_per_second": 2742.348, |
|
"eval_steps_per_second": 86.384, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.7735000252723694, |
|
"eval_loss": 0.5514509081840515, |
|
"eval_runtime": 0.735, |
|
"eval_samples_per_second": 2721.166, |
|
"eval_steps_per_second": 85.717, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.7640867875647667e-05, |
|
"loss": 0.27, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.7879999876022339, |
|
"eval_loss": 0.5070611238479614, |
|
"eval_runtime": 0.7377, |
|
"eval_samples_per_second": 2710.994, |
|
"eval_steps_per_second": 85.396, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.3601036269430053e-05, |
|
"loss": 0.2665, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.7875000238418579, |
|
"eval_loss": 0.5371026992797852, |
|
"eval_runtime": 0.7291, |
|
"eval_samples_per_second": 2743.286, |
|
"eval_steps_per_second": 86.413, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.7910000085830688, |
|
"eval_loss": 0.624961793422699, |
|
"eval_runtime": 0.7282, |
|
"eval_samples_per_second": 2746.606, |
|
"eval_steps_per_second": 86.518, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 9.553108808290157e-06, |
|
"loss": 0.2171, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.7940000295639038, |
|
"eval_loss": 0.5903840065002441, |
|
"eval_runtime": 0.7375, |
|
"eval_samples_per_second": 2712.014, |
|
"eval_steps_per_second": 85.428, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.7850000262260437, |
|
"eval_loss": 0.598899781703949, |
|
"eval_runtime": 0.7383, |
|
"eval_samples_per_second": 2709.079, |
|
"eval_steps_per_second": 85.336, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.50518134715026e-06, |
|
"loss": 0.2083, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.7910000085830688, |
|
"eval_loss": 0.5919374227523804, |
|
"eval_runtime": 0.7401, |
|
"eval_samples_per_second": 2702.433, |
|
"eval_steps_per_second": 85.127, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.4653497409326426e-06, |
|
"loss": 0.1906, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.7864999771118164, |
|
"eval_loss": 0.6115372180938721, |
|
"eval_runtime": 0.7292, |
|
"eval_samples_per_second": 2742.712, |
|
"eval_steps_per_second": 86.395, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 6176, |
|
"total_flos": 4946498250063360.0, |
|
"train_loss": 0.35224579220608726, |
|
"train_runtime": 2738.1778, |
|
"train_samples_per_second": 72.166, |
|
"train_steps_per_second": 2.256 |
|
} |
|
], |
|
"max_steps": 6176, |
|
"num_train_epochs": 4, |
|
"total_flos": 4946498250063360.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|