{ "best_metric": 1.9373520612716675, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.010051766597979595, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002010353319595919, "grad_norm": 0.7243201732635498, "learning_rate": 1e-05, "loss": 1.9001, "step": 1 }, { "epoch": 0.0002010353319595919, "eval_loss": 2.11344051361084, "eval_runtime": 171.9074, "eval_samples_per_second": 48.736, "eval_steps_per_second": 12.187, "step": 1 }, { "epoch": 0.0004020706639191838, "grad_norm": 0.9003947377204895, "learning_rate": 2e-05, "loss": 1.8396, "step": 2 }, { "epoch": 0.0006031059958787757, "grad_norm": 0.9272552728652954, "learning_rate": 3e-05, "loss": 1.8755, "step": 3 }, { "epoch": 0.0008041413278383676, "grad_norm": 1.0478748083114624, "learning_rate": 4e-05, "loss": 1.7778, "step": 4 }, { "epoch": 0.0010051766597979595, "grad_norm": 0.9338437914848328, "learning_rate": 5e-05, "loss": 1.8533, "step": 5 }, { "epoch": 0.0012062119917575513, "grad_norm": 0.9951044321060181, "learning_rate": 6e-05, "loss": 1.8048, "step": 6 }, { "epoch": 0.0014072473237171433, "grad_norm": 0.8657851219177246, "learning_rate": 7e-05, "loss": 1.8549, "step": 7 }, { "epoch": 0.0016082826556767353, "grad_norm": 0.7345765233039856, "learning_rate": 8e-05, "loss": 1.8697, "step": 8 }, { "epoch": 0.001809317987636327, "grad_norm": 0.9524029493331909, "learning_rate": 9e-05, "loss": 1.8195, "step": 9 }, { "epoch": 0.002010353319595919, "grad_norm": 0.9118521809577942, "learning_rate": 0.0001, "loss": 1.8523, "step": 10 }, { "epoch": 0.002211388651555511, "grad_norm": 0.8495756387710571, "learning_rate": 9.999316524962345e-05, "loss": 1.816, "step": 11 }, { "epoch": 0.0024124239835151026, "grad_norm": 0.7486765384674072, "learning_rate": 9.997266286704631e-05, "loss": 1.7435, "step": 12 }, { "epoch": 0.002613459315474695, "grad_norm": 0.871289849281311, "learning_rate": 9.993849845741524e-05, "loss": 1.8233, "step": 13 }, { "epoch": 0.0028144946474342866, "grad_norm": 0.7088490724563599, "learning_rate": 9.989068136093873e-05, "loss": 1.7216, "step": 14 }, { "epoch": 0.0030155299793938784, "grad_norm": 0.8408116698265076, "learning_rate": 9.98292246503335e-05, "loss": 1.8027, "step": 15 }, { "epoch": 0.0032165653113534706, "grad_norm": 0.7949049472808838, "learning_rate": 9.975414512725057e-05, "loss": 1.835, "step": 16 }, { "epoch": 0.0034176006433130623, "grad_norm": 0.9442782402038574, "learning_rate": 9.966546331768191e-05, "loss": 1.9566, "step": 17 }, { "epoch": 0.003618635975272654, "grad_norm": 0.7910245656967163, "learning_rate": 9.956320346634876e-05, "loss": 1.7318, "step": 18 }, { "epoch": 0.003819671307232246, "grad_norm": 0.8084377646446228, "learning_rate": 9.944739353007344e-05, "loss": 1.7165, "step": 19 }, { "epoch": 0.004020706639191838, "grad_norm": 0.8121070861816406, "learning_rate": 9.931806517013612e-05, "loss": 1.9654, "step": 20 }, { "epoch": 0.0042217419711514294, "grad_norm": 0.9022719264030457, "learning_rate": 9.917525374361912e-05, "loss": 1.8507, "step": 21 }, { "epoch": 0.004422777303111022, "grad_norm": 0.7618189454078674, "learning_rate": 9.901899829374047e-05, "loss": 1.7183, "step": 22 }, { "epoch": 0.004623812635070614, "grad_norm": 0.6915072202682495, "learning_rate": 9.884934153917997e-05, "loss": 1.8579, "step": 23 }, { "epoch": 0.004824847967030205, "grad_norm": 0.7315471768379211, "learning_rate": 9.86663298624003e-05, "loss": 1.8386, "step": 24 }, { "epoch": 0.005025883298989797, "grad_norm": 0.7275452017784119, "learning_rate": 9.847001329696653e-05, "loss": 1.772, "step": 25 }, { "epoch": 0.00522691863094939, "grad_norm": 0.7768784761428833, "learning_rate": 9.826044551386744e-05, "loss": 1.7954, "step": 26 }, { "epoch": 0.005427953962908981, "grad_norm": 0.8576830625534058, "learning_rate": 9.803768380684242e-05, "loss": 1.8411, "step": 27 }, { "epoch": 0.005628989294868573, "grad_norm": 0.7678402066230774, "learning_rate": 9.780178907671789e-05, "loss": 1.769, "step": 28 }, { "epoch": 0.005830024626828165, "grad_norm": 1.0145021677017212, "learning_rate": 9.755282581475769e-05, "loss": 2.0439, "step": 29 }, { "epoch": 0.006031059958787757, "grad_norm": 0.9616433382034302, "learning_rate": 9.729086208503174e-05, "loss": 1.8919, "step": 30 }, { "epoch": 0.006232095290747349, "grad_norm": 0.7848714590072632, "learning_rate": 9.701596950580806e-05, "loss": 1.8079, "step": 31 }, { "epoch": 0.006433130622706941, "grad_norm": 0.8140174150466919, "learning_rate": 9.672822322997305e-05, "loss": 1.7669, "step": 32 }, { "epoch": 0.0066341659546665325, "grad_norm": 0.890622615814209, "learning_rate": 9.642770192448536e-05, "loss": 1.6712, "step": 33 }, { "epoch": 0.006835201286626125, "grad_norm": 0.872776210308075, "learning_rate": 9.611448774886924e-05, "loss": 1.8183, "step": 34 }, { "epoch": 0.007036236618585716, "grad_norm": 0.9921485781669617, "learning_rate": 9.578866633275288e-05, "loss": 1.8694, "step": 35 }, { "epoch": 0.007237271950545308, "grad_norm": 1.0803183317184448, "learning_rate": 9.545032675245813e-05, "loss": 1.9187, "step": 36 }, { "epoch": 0.0074383072825049005, "grad_norm": 0.9387599229812622, "learning_rate": 9.509956150664796e-05, "loss": 1.9963, "step": 37 }, { "epoch": 0.007639342614464492, "grad_norm": 1.0187156200408936, "learning_rate": 9.473646649103818e-05, "loss": 1.7119, "step": 38 }, { "epoch": 0.007840377946424084, "grad_norm": 1.048580527305603, "learning_rate": 9.43611409721806e-05, "loss": 1.7683, "step": 39 }, { "epoch": 0.008041413278383676, "grad_norm": 0.9831770658493042, "learning_rate": 9.397368756032445e-05, "loss": 1.7932, "step": 40 }, { "epoch": 0.008242448610343268, "grad_norm": 1.0285784006118774, "learning_rate": 9.357421218136386e-05, "loss": 1.8248, "step": 41 }, { "epoch": 0.008443483942302859, "grad_norm": 1.1290544271469116, "learning_rate": 9.316282404787871e-05, "loss": 1.9735, "step": 42 }, { "epoch": 0.008644519274262451, "grad_norm": 1.0321779251098633, "learning_rate": 9.273963562927695e-05, "loss": 1.783, "step": 43 }, { "epoch": 0.008845554606222043, "grad_norm": 1.100907802581787, "learning_rate": 9.230476262104677e-05, "loss": 1.7801, "step": 44 }, { "epoch": 0.009046589938181636, "grad_norm": 1.203137993812561, "learning_rate": 9.185832391312644e-05, "loss": 1.9669, "step": 45 }, { "epoch": 0.009247625270141228, "grad_norm": 1.23589289188385, "learning_rate": 9.140044155740101e-05, "loss": 1.6836, "step": 46 }, { "epoch": 0.00944866060210082, "grad_norm": 1.3507293462753296, "learning_rate": 9.093124073433463e-05, "loss": 1.6699, "step": 47 }, { "epoch": 0.00964969593406041, "grad_norm": 1.4106907844543457, "learning_rate": 9.045084971874738e-05, "loss": 1.7651, "step": 48 }, { "epoch": 0.009850731266020003, "grad_norm": 1.957142949104309, "learning_rate": 8.995939984474624e-05, "loss": 1.8771, "step": 49 }, { "epoch": 0.010051766597979595, "grad_norm": 2.773338556289673, "learning_rate": 8.945702546981969e-05, "loss": 2.0025, "step": 50 }, { "epoch": 0.010051766597979595, "eval_loss": 1.9373520612716675, "eval_runtime": 172.4358, "eval_samples_per_second": 48.586, "eval_steps_per_second": 12.149, "step": 50 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0825725351297024e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }