robiual-awal's picture
Training in progress, step 200, checkpoint
9a00076 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.08790242830458192,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0004395121415229096,
"eval_loss": 4.948474407196045,
"eval_runtime": 52.4919,
"eval_samples_per_second": 18.269,
"eval_steps_per_second": 9.144,
"step": 1
},
{
"epoch": 0.004395121415229096,
"grad_norm": 11.48950481414795,
"learning_rate": 0.0002,
"loss": 14.3578,
"step": 10
},
{
"epoch": 0.008790242830458192,
"grad_norm": 12.444845199584961,
"learning_rate": 0.0002,
"loss": 4.3853,
"step": 20
},
{
"epoch": 0.013185364245687286,
"grad_norm": 9.680885314941406,
"learning_rate": 0.0002,
"loss": 1.6288,
"step": 30
},
{
"epoch": 0.017580485660916384,
"grad_norm": 4.9109015464782715,
"learning_rate": 0.0002,
"loss": 1.0173,
"step": 40
},
{
"epoch": 0.02197560707614548,
"grad_norm": 15.963685989379883,
"learning_rate": 0.0002,
"loss": 0.5108,
"step": 50
},
{
"epoch": 0.02197560707614548,
"eval_loss": 0.07556916773319244,
"eval_runtime": 52.5274,
"eval_samples_per_second": 18.257,
"eval_steps_per_second": 9.138,
"step": 50
},
{
"epoch": 0.026370728491374573,
"grad_norm": 19.075408935546875,
"learning_rate": 0.0002,
"loss": 0.3619,
"step": 60
},
{
"epoch": 0.03076584990660367,
"grad_norm": 19.47846794128418,
"learning_rate": 0.0002,
"loss": 0.4093,
"step": 70
},
{
"epoch": 0.03516097132183277,
"grad_norm": 7.209693908691406,
"learning_rate": 0.0002,
"loss": 0.3493,
"step": 80
},
{
"epoch": 0.039556092737061864,
"grad_norm": 3.2900302410125732,
"learning_rate": 0.0002,
"loss": 0.595,
"step": 90
},
{
"epoch": 0.04395121415229096,
"grad_norm": 7.369869709014893,
"learning_rate": 0.0002,
"loss": 0.4487,
"step": 100
},
{
"epoch": 0.04395121415229096,
"eval_loss": 0.0811665803194046,
"eval_runtime": 52.465,
"eval_samples_per_second": 18.279,
"eval_steps_per_second": 9.149,
"step": 100
},
{
"epoch": 0.048346335567520056,
"grad_norm": 3.1896722316741943,
"learning_rate": 0.0002,
"loss": 0.3859,
"step": 110
},
{
"epoch": 0.052741456982749145,
"grad_norm": 1.2178314924240112,
"learning_rate": 0.0002,
"loss": 0.401,
"step": 120
},
{
"epoch": 0.05713657839797824,
"grad_norm": 6.184231758117676,
"learning_rate": 0.0002,
"loss": 0.4058,
"step": 130
},
{
"epoch": 0.06153169981320734,
"grad_norm": 6.1389899253845215,
"learning_rate": 0.0002,
"loss": 0.3071,
"step": 140
},
{
"epoch": 0.06592682122843643,
"grad_norm": 4.514057159423828,
"learning_rate": 0.0002,
"loss": 0.2695,
"step": 150
},
{
"epoch": 0.06592682122843643,
"eval_loss": 0.051861684769392014,
"eval_runtime": 52.5077,
"eval_samples_per_second": 18.264,
"eval_steps_per_second": 9.142,
"step": 150
},
{
"epoch": 0.07032194264366554,
"grad_norm": 4.898068904876709,
"learning_rate": 0.0002,
"loss": 0.2806,
"step": 160
},
{
"epoch": 0.07471706405889462,
"grad_norm": 4.877391338348389,
"learning_rate": 0.0002,
"loss": 0.3743,
"step": 170
},
{
"epoch": 0.07911218547412373,
"grad_norm": 5.850367546081543,
"learning_rate": 0.0002,
"loss": 0.1653,
"step": 180
},
{
"epoch": 0.08350730688935282,
"grad_norm": 5.329926013946533,
"learning_rate": 0.0002,
"loss": 0.3935,
"step": 190
},
{
"epoch": 0.08790242830458192,
"grad_norm": 10.51096248626709,
"learning_rate": 0.0002,
"loss": 0.2814,
"step": 200
},
{
"epoch": 0.08790242830458192,
"eval_loss": 0.14480413496494293,
"eval_runtime": 52.5156,
"eval_samples_per_second": 18.261,
"eval_steps_per_second": 9.14,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.216069447634125e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}