|
{ |
|
"best_metric": 0.0002610796655062586, |
|
"best_model_checkpoint": "convnext-base-384-22k-1k-Kontur-competition-1.3K/checkpoint-135", |
|
"epoch": 14.210526315789474, |
|
"eval_steps": 500, |
|
"global_step": 135, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.5273232460021973, |
|
"eval_runtime": 5.3966, |
|
"eval_samples_per_second": 24.645, |
|
"eval_steps_per_second": 0.927, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 14.016679763793945, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.6611, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.15177780389785767, |
|
"eval_runtime": 4.4294, |
|
"eval_samples_per_second": 30.027, |
|
"eval_steps_per_second": 1.129, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 3.7719967365264893, |
|
"learning_rate": 4.75206611570248e-05, |
|
"loss": 0.2686, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 0.026649044826626778, |
|
"eval_runtime": 4.3155, |
|
"eval_samples_per_second": 30.819, |
|
"eval_steps_per_second": 1.159, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 9.589371681213379, |
|
"learning_rate": 4.338842975206612e-05, |
|
"loss": 0.0899, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.00659002223983407, |
|
"eval_runtime": 4.3662, |
|
"eval_samples_per_second": 30.461, |
|
"eval_steps_per_second": 1.145, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"grad_norm": 24.757675170898438, |
|
"learning_rate": 3.925619834710744e-05, |
|
"loss": 0.0379, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 0.002536825370043516, |
|
"eval_runtime": 4.318, |
|
"eval_samples_per_second": 30.802, |
|
"eval_steps_per_second": 1.158, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"grad_norm": 0.6816350221633911, |
|
"learning_rate": 3.512396694214876e-05, |
|
"loss": 0.0202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.0019522847142070532, |
|
"eval_runtime": 4.4042, |
|
"eval_samples_per_second": 30.199, |
|
"eval_steps_per_second": 1.135, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 0.10703104734420776, |
|
"learning_rate": 3.099173553719008e-05, |
|
"loss": 0.0048, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_loss": 0.00098791706841439, |
|
"eval_runtime": 4.3638, |
|
"eval_samples_per_second": 30.478, |
|
"eval_steps_per_second": 1.146, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 4.492315292358398, |
|
"learning_rate": 2.6859504132231405e-05, |
|
"loss": 0.0056, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.001134931342676282, |
|
"eval_runtime": 4.3514, |
|
"eval_samples_per_second": 30.565, |
|
"eval_steps_per_second": 1.149, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"grad_norm": 0.05526771396398544, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 0.0011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_loss": 0.0004670162743423134, |
|
"eval_runtime": 4.379, |
|
"eval_samples_per_second": 30.372, |
|
"eval_steps_per_second": 1.142, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 2.763624906539917, |
|
"learning_rate": 1.859504132231405e-05, |
|
"loss": 0.0017, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.0014447210123762488, |
|
"eval_runtime": 4.3565, |
|
"eval_samples_per_second": 30.529, |
|
"eval_steps_per_second": 1.148, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"grad_norm": 0.29414886236190796, |
|
"learning_rate": 1.4462809917355372e-05, |
|
"loss": 0.0076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_loss": 0.0004167805891484022, |
|
"eval_runtime": 4.5875, |
|
"eval_samples_per_second": 28.992, |
|
"eval_steps_per_second": 1.09, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"grad_norm": 0.02740568295121193, |
|
"learning_rate": 1.0330578512396695e-05, |
|
"loss": 0.0018, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0003142206114716828, |
|
"eval_runtime": 4.7253, |
|
"eval_samples_per_second": 28.146, |
|
"eval_steps_per_second": 1.058, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"grad_norm": 0.06253942847251892, |
|
"learning_rate": 6.198347107438017e-06, |
|
"loss": 0.0027, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_loss": 0.0002625222550705075, |
|
"eval_runtime": 4.4194, |
|
"eval_samples_per_second": 30.094, |
|
"eval_steps_per_second": 1.131, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"grad_norm": 0.3781050741672516, |
|
"learning_rate": 2.066115702479339e-06, |
|
"loss": 0.0008, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.00026170219643972814, |
|
"eval_runtime": 4.4435, |
|
"eval_samples_per_second": 29.931, |
|
"eval_steps_per_second": 1.125, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"eval_loss": 0.0002610796655062586, |
|
"eval_runtime": 4.4227, |
|
"eval_samples_per_second": 30.072, |
|
"eval_steps_per_second": 1.131, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"step": 135, |
|
"total_flos": 3.9447179555061105e+18, |
|
"train_loss": 0.08187244446534249, |
|
"train_runtime": 1269.7055, |
|
"train_samples_per_second": 14.106, |
|
"train_steps_per_second": 0.106 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 135, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 3.9447179555061105e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|