error577's picture
Training in progress, step 60, checkpoint
0e3e770 verified
raw
history blame
11.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.03961867029837811,
"eval_steps": 20,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006603111716396351,
"grad_norm": 300608.875,
"learning_rate": 2.9999999999999997e-05,
"loss": 118.0917,
"step": 1
},
{
"epoch": 0.0006603111716396351,
"eval_loss": 15.74350357055664,
"eval_runtime": 6.9487,
"eval_samples_per_second": 71.236,
"eval_steps_per_second": 71.236,
"step": 1
},
{
"epoch": 0.0013206223432792703,
"grad_norm": 277566.0,
"learning_rate": 5.9999999999999995e-05,
"loss": 120.2507,
"step": 2
},
{
"epoch": 0.0019809335149189055,
"grad_norm": 418405.1875,
"learning_rate": 8.999999999999999e-05,
"loss": 143.5985,
"step": 3
},
{
"epoch": 0.0026412446865585405,
"grad_norm": 222641.09375,
"learning_rate": 0.00011999999999999999,
"loss": 113.3078,
"step": 4
},
{
"epoch": 0.003301555858198176,
"grad_norm": 80659.1640625,
"learning_rate": 0.00015,
"loss": 89.0113,
"step": 5
},
{
"epoch": 0.003961867029837811,
"grad_norm": 174515.8125,
"learning_rate": 0.00017999999999999998,
"loss": 92.8388,
"step": 6
},
{
"epoch": 0.004622178201477446,
"grad_norm": 68650.6640625,
"learning_rate": 0.00020999999999999998,
"loss": 88.8969,
"step": 7
},
{
"epoch": 0.005282489373117081,
"grad_norm": 52668.09765625,
"learning_rate": 0.00023999999999999998,
"loss": 83.2267,
"step": 8
},
{
"epoch": 0.005942800544756717,
"grad_norm": 61407.41796875,
"learning_rate": 0.00027,
"loss": 92.7579,
"step": 9
},
{
"epoch": 0.006603111716396352,
"grad_norm": 37122.61328125,
"learning_rate": 0.0003,
"loss": 84.5321,
"step": 10
},
{
"epoch": 0.007263422888035987,
"grad_norm": 58673.69921875,
"learning_rate": 0.0002999911984174669,
"loss": 93.7724,
"step": 11
},
{
"epoch": 0.007923734059675622,
"grad_norm": 121362.359375,
"learning_rate": 0.0002999647947027726,
"loss": 108.6176,
"step": 12
},
{
"epoch": 0.008584045231315257,
"grad_norm": 233485.53125,
"learning_rate": 0.0002999207919545099,
"loss": 105.0768,
"step": 13
},
{
"epoch": 0.009244356402954892,
"grad_norm": 318686.03125,
"learning_rate": 0.0002998591953365965,
"loss": 99.2428,
"step": 14
},
{
"epoch": 0.009904667574594527,
"grad_norm": 101194.0625,
"learning_rate": 0.00029978001207766854,
"loss": 95.0131,
"step": 15
},
{
"epoch": 0.010564978746234162,
"grad_norm": 83839.953125,
"learning_rate": 0.00029968325147023263,
"loss": 86.1607,
"step": 16
},
{
"epoch": 0.011225289917873797,
"grad_norm": 68342.7109375,
"learning_rate": 0.000299568924869575,
"loss": 85.744,
"step": 17
},
{
"epoch": 0.011885601089513434,
"grad_norm": 232218.078125,
"learning_rate": 0.00029943704569242917,
"loss": 89.0831,
"step": 18
},
{
"epoch": 0.012545912261153069,
"grad_norm": 142226.359375,
"learning_rate": 0.0002992876294154013,
"loss": 87.8575,
"step": 19
},
{
"epoch": 0.013206223432792704,
"grad_norm": 99313.8125,
"learning_rate": 0.00029912069357315393,
"loss": 91.0717,
"step": 20
},
{
"epoch": 0.013206223432792704,
"eval_loss": 15.472484588623047,
"eval_runtime": 6.5506,
"eval_samples_per_second": 75.565,
"eval_steps_per_second": 75.565,
"step": 20
},
{
"epoch": 0.013866534604432339,
"grad_norm": 361644.25,
"learning_rate": 0.00029893625775634835,
"loss": 97.0436,
"step": 21
},
{
"epoch": 0.014526845776071974,
"grad_norm": 1564964.125,
"learning_rate": 0.0002987343436093454,
"loss": 279.3873,
"step": 22
},
{
"epoch": 0.015187156947711609,
"grad_norm": 1539204.875,
"learning_rate": 0.00029851497482766547,
"loss": 668.731,
"step": 23
},
{
"epoch": 0.015847468119351244,
"grad_norm": 2246419.0,
"learning_rate": 0.00029827817715520773,
"loss": 758.2188,
"step": 24
},
{
"epoch": 0.01650777929099088,
"grad_norm": 2776799.5,
"learning_rate": 0.0002980239783812289,
"loss": 546.4688,
"step": 25
},
{
"epoch": 0.017168090462630514,
"grad_norm": 1617533.25,
"learning_rate": 0.0002977524083370822,
"loss": 422.7344,
"step": 26
},
{
"epoch": 0.01782840163427015,
"grad_norm": 450934.75,
"learning_rate": 0.00029746349889271645,
"loss": 339.3945,
"step": 27
},
{
"epoch": 0.018488712805909784,
"grad_norm": 2286499.5,
"learning_rate": 0.0002971572839529358,
"loss": 236.2812,
"step": 28
},
{
"epoch": 0.01914902397754942,
"grad_norm": 3999992.25,
"learning_rate": 0.00029683379945342125,
"loss": 159.8123,
"step": 29
},
{
"epoch": 0.019809335149189054,
"grad_norm": 308621.125,
"learning_rate": 0.000296493083356513,
"loss": 124.3508,
"step": 30
},
{
"epoch": 0.02046964632082869,
"grad_norm": 497120.59375,
"learning_rate": 0.00029613517564675565,
"loss": 138.8941,
"step": 31
},
{
"epoch": 0.021129957492468324,
"grad_norm": 99928.7890625,
"learning_rate": 0.0002957601183262058,
"loss": 151.8223,
"step": 32
},
{
"epoch": 0.02179026866410796,
"grad_norm": 405539.0625,
"learning_rate": 0.000295367955409503,
"loss": 1203.2074,
"step": 33
},
{
"epoch": 0.022450579835747594,
"grad_norm": 463994.21875,
"learning_rate": 0.00029495873291870436,
"loss": 1256.5367,
"step": 34
},
{
"epoch": 0.02311089100738723,
"grad_norm": 953879.5625,
"learning_rate": 0.0002945324988778834,
"loss": 938.6675,
"step": 35
},
{
"epoch": 0.023771202179026868,
"grad_norm": 178221.28125,
"learning_rate": 0.00029408930330749477,
"loss": 356.9532,
"step": 36
},
{
"epoch": 0.0244315133506665,
"grad_norm": 146182.625,
"learning_rate": 0.0002936291982185036,
"loss": 238.3703,
"step": 37
},
{
"epoch": 0.025091824522306138,
"grad_norm": 116065.7421875,
"learning_rate": 0.00029315223760628217,
"loss": 212.9555,
"step": 38
},
{
"epoch": 0.02575213569394577,
"grad_norm": 129193.03125,
"learning_rate": 0.00029265847744427303,
"loss": 227.929,
"step": 39
},
{
"epoch": 0.026412446865585408,
"grad_norm": 152996.75,
"learning_rate": 0.00029214797567742035,
"loss": 220.4361,
"step": 40
},
{
"epoch": 0.026412446865585408,
"eval_loss": 16.9798526763916,
"eval_runtime": 6.5479,
"eval_samples_per_second": 75.597,
"eval_steps_per_second": 75.597,
"step": 40
},
{
"epoch": 0.02707275803722504,
"grad_norm": 166313.5,
"learning_rate": 0.00029162079221537,
"loss": 178.7949,
"step": 41
},
{
"epoch": 0.027733069208864678,
"grad_norm": 135891.28125,
"learning_rate": 0.0002910769889254386,
"loss": 201.0785,
"step": 42
},
{
"epoch": 0.02839338038050431,
"grad_norm": 64060.7890625,
"learning_rate": 0.0002905166296253533,
"loss": 163.5094,
"step": 43
},
{
"epoch": 0.029053691552143948,
"grad_norm": 96362.4921875,
"learning_rate": 0.0002899397800757626,
"loss": 140.6384,
"step": 44
},
{
"epoch": 0.02971400272378358,
"grad_norm": 166254.203125,
"learning_rate": 0.0002893465079725187,
"loss": 139.1684,
"step": 45
},
{
"epoch": 0.030374313895423218,
"grad_norm": 161925.5625,
"learning_rate": 0.0002887368829387333,
"loss": 140.9152,
"step": 46
},
{
"epoch": 0.031034625067062855,
"grad_norm": 637966.3125,
"learning_rate": 0.0002881109765166071,
"loss": 131.3419,
"step": 47
},
{
"epoch": 0.03169493623870249,
"grad_norm": 367775.03125,
"learning_rate": 0.00028746886215903387,
"loss": 155.0525,
"step": 48
},
{
"epoch": 0.032355247410342125,
"grad_norm": 307120.84375,
"learning_rate": 0.00028681061522098047,
"loss": 148.0313,
"step": 49
},
{
"epoch": 0.03301555858198176,
"grad_norm": 164428.203125,
"learning_rate": 0.0002861363129506435,
"loss": 139.0605,
"step": 50
},
{
"epoch": 0.03367586975362139,
"grad_norm": 56655.140625,
"learning_rate": 0.0002854460344803842,
"loss": 105.2498,
"step": 51
},
{
"epoch": 0.03433618092526103,
"grad_norm": 110095.71875,
"learning_rate": 0.00028473986081744163,
"loss": 107.1039,
"step": 52
},
{
"epoch": 0.034996492096900665,
"grad_norm": 84727.8125,
"learning_rate": 0.000284017874834426,
"loss": 114.7597,
"step": 53
},
{
"epoch": 0.0356568032685403,
"grad_norm": 108558.9140625,
"learning_rate": 0.0002832801612595937,
"loss": 131.0451,
"step": 54
},
{
"epoch": 0.03631711444017993,
"grad_norm": 35595.47265625,
"learning_rate": 0.0002825268066669034,
"loss": 135.1516,
"step": 55
},
{
"epoch": 0.03697742561181957,
"grad_norm": 54421.08203125,
"learning_rate": 0.00028175789946585693,
"loss": 116.2731,
"step": 56
},
{
"epoch": 0.037637736783459205,
"grad_norm": 72844.515625,
"learning_rate": 0.0002809735298911234,
"loss": 101.419,
"step": 57
},
{
"epoch": 0.03829804795509884,
"grad_norm": 58473.41015625,
"learning_rate": 0.00028017378999195015,
"loss": 101.8432,
"step": 58
},
{
"epoch": 0.03895835912673848,
"grad_norm": 41094.68359375,
"learning_rate": 0.0002793587736213603,
"loss": 114.5148,
"step": 59
},
{
"epoch": 0.03961867029837811,
"grad_norm": 75345.5234375,
"learning_rate": 0.00027852857642513836,
"loss": 119.3659,
"step": 60
},
{
"epoch": 0.03961867029837811,
"eval_loss": 16.175268173217773,
"eval_runtime": 6.5722,
"eval_samples_per_second": 75.317,
"eval_steps_per_second": 75.317,
"step": 60
}
],
"logging_steps": 1,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 40390914736128.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}