mamung's picture
Training in progress, step 100, checkpoint
690439d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.847457627118644,
"eval_steps": 9,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00847457627118644,
"eval_loss": 1.2892621755599976,
"eval_runtime": 23.2041,
"eval_samples_per_second": 18.1,
"eval_steps_per_second": 2.284,
"step": 1
},
{
"epoch": 0.0423728813559322,
"grad_norm": 0.8503715395927429,
"learning_rate": 3.75e-05,
"loss": 1.2136,
"step": 5
},
{
"epoch": 0.07627118644067797,
"eval_loss": 1.1160207986831665,
"eval_runtime": 22.2287,
"eval_samples_per_second": 18.894,
"eval_steps_per_second": 2.384,
"step": 9
},
{
"epoch": 0.0847457627118644,
"grad_norm": 0.8837136626243591,
"learning_rate": 7.5e-05,
"loss": 1.1313,
"step": 10
},
{
"epoch": 0.1271186440677966,
"grad_norm": 0.7536290287971497,
"learning_rate": 0.0001125,
"loss": 1.0435,
"step": 15
},
{
"epoch": 0.15254237288135594,
"eval_loss": 0.9707416296005249,
"eval_runtime": 22.2197,
"eval_samples_per_second": 18.902,
"eval_steps_per_second": 2.385,
"step": 18
},
{
"epoch": 0.1694915254237288,
"grad_norm": 0.8838627338409424,
"learning_rate": 0.00015,
"loss": 0.9562,
"step": 20
},
{
"epoch": 0.211864406779661,
"grad_norm": 0.6448015570640564,
"learning_rate": 0.00014855889603024227,
"loss": 0.884,
"step": 25
},
{
"epoch": 0.2288135593220339,
"eval_loss": 0.9067414999008179,
"eval_runtime": 22.2329,
"eval_samples_per_second": 18.891,
"eval_steps_per_second": 2.384,
"step": 27
},
{
"epoch": 0.2542372881355932,
"grad_norm": 0.6635559797286987,
"learning_rate": 0.0001442909649383465,
"loss": 0.9048,
"step": 30
},
{
"epoch": 0.2966101694915254,
"grad_norm": 0.6447595357894897,
"learning_rate": 0.0001373602209226909,
"loss": 0.8876,
"step": 35
},
{
"epoch": 0.3050847457627119,
"eval_loss": 0.8686102032661438,
"eval_runtime": 22.2429,
"eval_samples_per_second": 18.882,
"eval_steps_per_second": 2.383,
"step": 36
},
{
"epoch": 0.3389830508474576,
"grad_norm": 0.5714731216430664,
"learning_rate": 0.00012803300858899104,
"loss": 0.8314,
"step": 40
},
{
"epoch": 0.3813559322033898,
"grad_norm": 0.5815892219543457,
"learning_rate": 0.00011666776747647015,
"loss": 0.7683,
"step": 45
},
{
"epoch": 0.3813559322033898,
"eval_loss": 0.8386306166648865,
"eval_runtime": 22.2395,
"eval_samples_per_second": 18.885,
"eval_steps_per_second": 2.383,
"step": 45
},
{
"epoch": 0.423728813559322,
"grad_norm": 0.5933341979980469,
"learning_rate": 0.00010370125742738173,
"loss": 0.792,
"step": 50
},
{
"epoch": 0.4576271186440678,
"eval_loss": 0.81903076171875,
"eval_runtime": 22.2328,
"eval_samples_per_second": 18.891,
"eval_steps_per_second": 2.384,
"step": 54
},
{
"epoch": 0.4661016949152542,
"grad_norm": 0.5448825359344482,
"learning_rate": 8.963177415120962e-05,
"loss": 0.7919,
"step": 55
},
{
"epoch": 0.5084745762711864,
"grad_norm": 0.5443424582481384,
"learning_rate": 7.5e-05,
"loss": 0.7779,
"step": 60
},
{
"epoch": 0.5338983050847458,
"eval_loss": 0.799988329410553,
"eval_runtime": 22.2537,
"eval_samples_per_second": 18.873,
"eval_steps_per_second": 2.382,
"step": 63
},
{
"epoch": 0.5508474576271186,
"grad_norm": 0.5052010416984558,
"learning_rate": 6.036822584879038e-05,
"loss": 0.7611,
"step": 65
},
{
"epoch": 0.5932203389830508,
"grad_norm": 0.5269892811775208,
"learning_rate": 4.6298742572618266e-05,
"loss": 0.7769,
"step": 70
},
{
"epoch": 0.6101694915254238,
"eval_loss": 0.7839500904083252,
"eval_runtime": 22.2533,
"eval_samples_per_second": 18.874,
"eval_steps_per_second": 2.382,
"step": 72
},
{
"epoch": 0.635593220338983,
"grad_norm": 0.5438559055328369,
"learning_rate": 3.333223252352985e-05,
"loss": 0.7603,
"step": 75
},
{
"epoch": 0.6779661016949152,
"grad_norm": 0.5549936294555664,
"learning_rate": 2.1966991411008938e-05,
"loss": 0.7573,
"step": 80
},
{
"epoch": 0.6864406779661016,
"eval_loss": 0.7745689153671265,
"eval_runtime": 22.2322,
"eval_samples_per_second": 18.891,
"eval_steps_per_second": 2.384,
"step": 81
},
{
"epoch": 0.7203389830508474,
"grad_norm": 0.5136195421218872,
"learning_rate": 1.2639779077309098e-05,
"loss": 0.77,
"step": 85
},
{
"epoch": 0.7627118644067796,
"grad_norm": 0.5830088257789612,
"learning_rate": 5.709035061653494e-06,
"loss": 0.7518,
"step": 90
},
{
"epoch": 0.7627118644067796,
"eval_loss": 0.7687093019485474,
"eval_runtime": 22.2345,
"eval_samples_per_second": 18.89,
"eval_steps_per_second": 2.384,
"step": 90
},
{
"epoch": 0.8050847457627118,
"grad_norm": 0.5288379192352295,
"learning_rate": 1.4411039697577175e-06,
"loss": 0.7473,
"step": 95
},
{
"epoch": 0.8389830508474576,
"eval_loss": 0.7672185897827148,
"eval_runtime": 22.2757,
"eval_samples_per_second": 18.855,
"eval_steps_per_second": 2.379,
"step": 99
},
{
"epoch": 0.847457627118644,
"grad_norm": 0.5222312211990356,
"learning_rate": 0.0,
"loss": 0.7123,
"step": 100
}
],
"logging_steps": 5,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.549997702161367e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}