{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02321981424148607, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007739938080495357, "eval_loss": 1.6945956945419312, "eval_runtime": 110.0753, "eval_samples_per_second": 4.951, "eval_steps_per_second": 2.48, "step": 1 }, { "epoch": 0.0023219814241486067, "grad_norm": 4.82108736038208, "learning_rate": 6e-05, "loss": 6.8237, "step": 3 }, { "epoch": 0.003869969040247678, "eval_loss": 1.6913092136383057, "eval_runtime": 111.3468, "eval_samples_per_second": 4.895, "eval_steps_per_second": 2.452, "step": 5 }, { "epoch": 0.0046439628482972135, "grad_norm": 1.9441202878952026, "learning_rate": 0.00012, "loss": 6.141, "step": 6 }, { "epoch": 0.00696594427244582, "grad_norm": 1.3660614490509033, "learning_rate": 0.00018, "loss": 5.5771, "step": 9 }, { "epoch": 0.007739938080495356, "eval_loss": 1.6541721820831299, "eval_runtime": 111.3024, "eval_samples_per_second": 4.897, "eval_steps_per_second": 2.453, "step": 10 }, { "epoch": 0.009287925696594427, "grad_norm": 3.83369779586792, "learning_rate": 0.00019510565162951537, "loss": 5.9687, "step": 12 }, { "epoch": 0.011609907120743035, "grad_norm": 1.0999690294265747, "learning_rate": 0.00017071067811865476, "loss": 7.028, "step": 15 }, { "epoch": 0.011609907120743035, "eval_loss": 1.5518581867218018, "eval_runtime": 111.3292, "eval_samples_per_second": 4.895, "eval_steps_per_second": 2.452, "step": 15 }, { "epoch": 0.01393188854489164, "grad_norm": 1.7612863779067993, "learning_rate": 0.00013090169943749476, "loss": 6.8503, "step": 18 }, { "epoch": 0.015479876160990712, "eval_loss": 1.4609495401382446, "eval_runtime": 111.3426, "eval_samples_per_second": 4.895, "eval_steps_per_second": 2.452, "step": 20 }, { "epoch": 0.016253869969040248, "grad_norm": 1.8704389333724976, "learning_rate": 8.435655349597689e-05, "loss": 6.6885, "step": 21 }, { "epoch": 0.018575851393188854, "grad_norm": 1.0107614994049072, "learning_rate": 4.12214747707527e-05, "loss": 5.4309, "step": 24 }, { "epoch": 0.01934984520123839, "eval_loss": 1.4435930252075195, "eval_runtime": 111.2717, "eval_samples_per_second": 4.898, "eval_steps_per_second": 2.453, "step": 25 }, { "epoch": 0.02089783281733746, "grad_norm": 1.209702968597412, "learning_rate": 1.0899347581163221e-05, "loss": 5.7998, "step": 27 }, { "epoch": 0.02321981424148607, "grad_norm": 1.0272233486175537, "learning_rate": 0.0, "loss": 6.2568, "step": 30 }, { "epoch": 0.02321981424148607, "eval_loss": 1.4408866167068481, "eval_runtime": 111.248, "eval_samples_per_second": 4.899, "eval_steps_per_second": 2.454, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2987609806536704e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }