global_step,perplexity,eval_loss,train_loss 10000,tensor(2.1992),0.7880828963867849,0.8737165927886963 20000,tensor(2.1610),0.7705507040390662,0.8072351813316345 30000,tensor(2.1334),0.7577285939474578,0.7155318260192871 34763,tensor(2.1201),0.7514673543713649,0.8827775716781616 40000,tensor(2.1026),0.7431845677856856,0.7359579801559448 50000,tensor(2.0772),0.731001776631241,0.6598162651062012 60000,tensor(2.0668),0.726000594505081,0.7386130690574646 69526,tensor(2.0506),0.7181160413628266,0.8968920707702637 70000,tensor(2.0418),0.7138193785487026,0.706092894077301 80000,tensor(2.0300),0.7080411803251254,0.7196658849716187 90000,tensor(2.0187),0.7024395720692894,0.8007305264472961 100000,tensor(2.0050),0.695622627103864,0.6531331539154053 104289,tensor(2.0049),0.6956184461403945,0.8391630053520203 110000,tensor(2.0087),0.6975072758021955,0.8131434917449951 120000,tensor(1.9993),0.6927953025623087,0.7214128375053406 130000,tensor(2.0056),0.6959200249105013,0.718044638633728 139052,tensor(2.0068),0.6965351469204804,0.7522366046905518