{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9879518072289155, "eval_steps": 500, "global_step": 186, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1606425702811245, "grad_norm": 0.1789746880531311, "learning_rate": 0.0002, "loss": 0.9249, "step": 10 }, { "epoch": 0.321285140562249, "grad_norm": 0.143571138381958, "learning_rate": 0.0002, "loss": 0.6491, "step": 20 }, { "epoch": 0.4819277108433735, "grad_norm": 0.12150721997022629, "learning_rate": 0.0002, "loss": 0.5985, "step": 30 }, { "epoch": 0.642570281124498, "grad_norm": 0.10915827751159668, "learning_rate": 0.0002, "loss": 0.644, "step": 40 }, { "epoch": 0.8032128514056225, "grad_norm": 0.10958955436944962, "learning_rate": 0.0002, "loss": 0.5759, "step": 50 }, { "epoch": 0.963855421686747, "grad_norm": 0.08871103078126907, "learning_rate": 0.0002, "loss": 0.5609, "step": 60 }, { "epoch": 1.1244979919678715, "grad_norm": 0.08688271045684814, "learning_rate": 0.0002, "loss": 0.5088, "step": 70 }, { "epoch": 1.285140562248996, "grad_norm": 0.088297538459301, "learning_rate": 0.0002, "loss": 0.4951, "step": 80 }, { "epoch": 1.4457831325301205, "grad_norm": 0.09266702085733414, "learning_rate": 0.0002, "loss": 0.4808, "step": 90 }, { "epoch": 1.606425702811245, "grad_norm": 0.08945482224225998, "learning_rate": 0.0002, "loss": 0.4818, "step": 100 }, { "epoch": 1.7670682730923695, "grad_norm": 0.08632655441761017, "learning_rate": 0.0002, "loss": 0.4836, "step": 110 }, { "epoch": 1.927710843373494, "grad_norm": 0.09446747601032257, "learning_rate": 0.0002, "loss": 0.4791, "step": 120 }, { "epoch": 2.0883534136546187, "grad_norm": 0.11169935017824173, "learning_rate": 0.0002, "loss": 0.4373, "step": 130 }, { "epoch": 2.248995983935743, "grad_norm": 0.11277974396944046, "learning_rate": 0.0002, "loss": 0.3987, "step": 140 }, { "epoch": 2.4096385542168672, "grad_norm": 0.11896611750125885, "learning_rate": 0.0002, "loss": 0.4026, "step": 150 }, { "epoch": 2.570281124497992, "grad_norm": 0.11799568682909012, "learning_rate": 0.0002, "loss": 0.4035, "step": 160 }, { "epoch": 2.7309236947791167, "grad_norm": 0.10658109933137894, "learning_rate": 0.0002, "loss": 0.4003, "step": 170 }, { "epoch": 2.891566265060241, "grad_norm": 0.12032566964626312, "learning_rate": 0.0002, "loss": 0.4095, "step": 180 } ], "logging_steps": 10, "max_steps": 186, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.4949482493483418e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }