rak-r05's picture
Training in progress, step 38, checkpoint
5ef8695 verified
raw
history blame
7.63 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.03544776119402985,
"eval_steps": 38,
"global_step": 38,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009328358208955224,
"grad_norm": 2.931856870651245,
"learning_rate": 4e-05,
"loss": 3.1183,
"step": 1
},
{
"epoch": 0.0009328358208955224,
"eval_loss": 0.7989240884780884,
"eval_runtime": 61.068,
"eval_samples_per_second": 7.402,
"eval_steps_per_second": 3.701,
"step": 1
},
{
"epoch": 0.0018656716417910447,
"grad_norm": 3.0857083797454834,
"learning_rate": 8e-05,
"loss": 3.3967,
"step": 2
},
{
"epoch": 0.002798507462686567,
"grad_norm": 2.9495201110839844,
"learning_rate": 0.00012,
"loss": 3.4361,
"step": 3
},
{
"epoch": 0.0037313432835820895,
"grad_norm": 2.4459776878356934,
"learning_rate": 0.00016,
"loss": 3.2275,
"step": 4
},
{
"epoch": 0.0046641791044776115,
"grad_norm": 2.410581588745117,
"learning_rate": 0.0002,
"loss": 3.0069,
"step": 5
},
{
"epoch": 0.005597014925373134,
"grad_norm": 1.99094820022583,
"learning_rate": 0.00024,
"loss": 2.6997,
"step": 6
},
{
"epoch": 0.0065298507462686565,
"grad_norm": 1.850408911705017,
"learning_rate": 0.00028,
"loss": 2.6469,
"step": 7
},
{
"epoch": 0.007462686567164179,
"grad_norm": 2.972104072570801,
"learning_rate": 0.00032,
"loss": 3.351,
"step": 8
},
{
"epoch": 0.008395522388059701,
"grad_norm": 2.8936173915863037,
"learning_rate": 0.00036,
"loss": 2.8041,
"step": 9
},
{
"epoch": 0.009328358208955223,
"grad_norm": 2.354464530944824,
"learning_rate": 0.0004,
"loss": 2.7652,
"step": 10
},
{
"epoch": 0.010261194029850746,
"grad_norm": 2.024070978164673,
"learning_rate": 0.0003999496469885013,
"loss": 2.8786,
"step": 11
},
{
"epoch": 0.011194029850746268,
"grad_norm": 1.8775830268859863,
"learning_rate": 0.00039979861330826294,
"loss": 2.0356,
"step": 12
},
{
"epoch": 0.012126865671641791,
"grad_norm": 1.7130846977233887,
"learning_rate": 0.0003995469750092912,
"loss": 2.6772,
"step": 13
},
{
"epoch": 0.013059701492537313,
"grad_norm": 1.9945520162582397,
"learning_rate": 0.00039919485879904784,
"loss": 2.8959,
"step": 14
},
{
"epoch": 0.013992537313432836,
"grad_norm": 1.6973563432693481,
"learning_rate": 0.00039874244197864856,
"loss": 2.3531,
"step": 15
},
{
"epoch": 0.014925373134328358,
"grad_norm": 1.5936486721038818,
"learning_rate": 0.00039818995235358696,
"loss": 1.9481,
"step": 16
},
{
"epoch": 0.01585820895522388,
"grad_norm": 1.828995943069458,
"learning_rate": 0.00039753766811902755,
"loss": 3.6787,
"step": 17
},
{
"epoch": 0.016791044776119403,
"grad_norm": 1.553421139717102,
"learning_rate": 0.0003967859177197259,
"loss": 2.2696,
"step": 18
},
{
"epoch": 0.017723880597014924,
"grad_norm": 1.2893372774124146,
"learning_rate": 0.00039593507968464716,
"loss": 1.9452,
"step": 19
},
{
"epoch": 0.018656716417910446,
"grad_norm": 1.674631953239441,
"learning_rate": 0.0003949855824363647,
"loss": 2.2916,
"step": 20
},
{
"epoch": 0.01958955223880597,
"grad_norm": 1.4569449424743652,
"learning_rate": 0.0003939379040753374,
"loss": 2.1701,
"step": 21
},
{
"epoch": 0.020522388059701493,
"grad_norm": 1.438828468322754,
"learning_rate": 0.00039279257213917066,
"loss": 2.5621,
"step": 22
},
{
"epoch": 0.021455223880597014,
"grad_norm": 1.3770123720169067,
"learning_rate": 0.0003915501633369861,
"loss": 2.1366,
"step": 23
},
{
"epoch": 0.022388059701492536,
"grad_norm": 1.4712828397750854,
"learning_rate": 0.00039021130325903074,
"loss": 2.319,
"step": 24
},
{
"epoch": 0.02332089552238806,
"grad_norm": 1.465248465538025,
"learning_rate": 0.00038877666606167355,
"loss": 2.3959,
"step": 25
},
{
"epoch": 0.024253731343283583,
"grad_norm": 1.4384740591049194,
"learning_rate": 0.00038724697412794747,
"loss": 2.097,
"step": 26
},
{
"epoch": 0.025186567164179104,
"grad_norm": 1.3539812564849854,
"learning_rate": 0.0003856229977038078,
"loss": 2.2313,
"step": 27
},
{
"epoch": 0.026119402985074626,
"grad_norm": 1.3618801832199097,
"learning_rate": 0.0003839055545102902,
"loss": 2.1054,
"step": 28
},
{
"epoch": 0.027052238805970148,
"grad_norm": 1.422633409500122,
"learning_rate": 0.00038209550933176323,
"loss": 2.276,
"step": 29
},
{
"epoch": 0.027985074626865673,
"grad_norm": 1.4232622385025024,
"learning_rate": 0.0003801937735804838,
"loss": 2.1735,
"step": 30
},
{
"epoch": 0.028917910447761194,
"grad_norm": 1.4555679559707642,
"learning_rate": 0.0003782013048376736,
"loss": 2.2706,
"step": 31
},
{
"epoch": 0.029850746268656716,
"grad_norm": 1.2929563522338867,
"learning_rate": 0.0003761191063713476,
"loss": 1.9037,
"step": 32
},
{
"epoch": 0.030783582089552237,
"grad_norm": 1.2687627077102661,
"learning_rate": 0.0003739482266311391,
"loss": 2.1032,
"step": 33
},
{
"epoch": 0.03171641791044776,
"grad_norm": 1.2993357181549072,
"learning_rate": 0.00037168975872037323,
"loss": 2.0062,
"step": 34
},
{
"epoch": 0.03264925373134328,
"grad_norm": 1.3507018089294434,
"learning_rate": 0.00036934483984565685,
"loss": 2.1522,
"step": 35
},
{
"epoch": 0.033582089552238806,
"grad_norm": 1.4183921813964844,
"learning_rate": 0.00036691465074426054,
"loss": 1.845,
"step": 36
},
{
"epoch": 0.03451492537313433,
"grad_norm": 1.3370906114578247,
"learning_rate": 0.00036440041508958203,
"loss": 1.9448,
"step": 37
},
{
"epoch": 0.03544776119402985,
"grad_norm": 1.4249347448349,
"learning_rate": 0.0003618033988749895,
"loss": 2.4594,
"step": 38
},
{
"epoch": 0.03544776119402985,
"eval_loss": 0.5035107135772705,
"eval_runtime": 60.2427,
"eval_samples_per_second": 7.503,
"eval_steps_per_second": 3.751,
"step": 38
}
],
"logging_steps": 1,
"max_steps": 150,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 38,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4981999911567360.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}