error577 commited on
Commit
7112d80
·
verified ·
1 Parent(s): ef7144f

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024de3f8e2afb4251670fd08dca4001b5a22b5aa81ae8963df0898d4eaf258bf
3
  size 578859568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0c195b03a78891addfd541c6f5a6e05e308cb677dddc2b5d1e1bc7a4317910
3
  size 578859568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbc5bb928545579b4a25c09020cb0ddd259af2200a0a70f135813471914ab7ec
3
  size 294324692
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea03775d6f122ef22f9a34fa6d7c975927ce0c5091946eb8c4d70964cfe011e
3
  size 294324692
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:404ce932a6e24aba5cc2e7fbd9a324e1334ed0f859ca94dba9e1b8e1bea61d54
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a000bcd0fcfbd6dc706ee094bc40e59bcb50a28e8797f55a0743ef881fecdf71
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fe5dbedd3d0105f98d40d84fbe544af591501f8969d82c59cef4d7bb5f81712
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9443e18e9eff1c8055981c18d9a28ff4f85044c4c7fdc07a0fbff8845c622c60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7006093859672546,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.011118755255349163,
5
  "eval_steps": 50,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2879,6 +2879,364 @@
2879
  "eval_samples_per_second": 11.541,
2880
  "eval_steps_per_second": 5.78,
2881
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2882
  }
2883
  ],
2884
  "logging_steps": 1,
@@ -2907,7 +3265,7 @@
2907
  "attributes": {}
2908
  }
2909
  },
2910
- "total_flos": 2.5658179794763776e+16,
2911
  "train_batch_size": 2,
2912
  "trial_name": null,
2913
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6939424276351929,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.012508599662267809,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2879
  "eval_samples_per_second": 11.541,
2880
  "eval_steps_per_second": 5.78,
2881
  "step": 400
2882
+ },
2883
+ {
2884
+ "epoch": 0.011146552143487537,
2885
+ "grad_norm": 0.09276453405618668,
2886
+ "learning_rate": 0.00019476814191464386,
2887
+ "loss": 0.8983,
2888
+ "step": 401
2889
+ },
2890
+ {
2891
+ "epoch": 0.01117434903162591,
2892
+ "grad_norm": 0.08963775634765625,
2893
+ "learning_rate": 0.00019098300562505265,
2894
+ "loss": 0.8343,
2895
+ "step": 402
2896
+ },
2897
+ {
2898
+ "epoch": 0.011202145919764282,
2899
+ "grad_norm": 0.09050919860601425,
2900
+ "learning_rate": 0.0001872311248553974,
2901
+ "loss": 0.7674,
2902
+ "step": 403
2903
+ },
2904
+ {
2905
+ "epoch": 0.011229942807902656,
2906
+ "grad_norm": 0.1092870682477951,
2907
+ "learning_rate": 0.00018351265383080128,
2908
+ "loss": 1.0618,
2909
+ "step": 404
2910
+ },
2911
+ {
2912
+ "epoch": 0.011257739696041028,
2913
+ "grad_norm": 0.08485256880521774,
2914
+ "learning_rate": 0.00017982774540304403,
2915
+ "loss": 0.7261,
2916
+ "step": 405
2917
+ },
2918
+ {
2919
+ "epoch": 0.011285536584179402,
2920
+ "grad_norm": 0.08782031387090683,
2921
+ "learning_rate": 0.00017617655104427832,
2922
+ "loss": 0.7258,
2923
+ "step": 406
2924
+ },
2925
+ {
2926
+ "epoch": 0.011313333472317774,
2927
+ "grad_norm": 0.0893518328666687,
2928
+ "learning_rate": 0.00017255922084080368,
2929
+ "loss": 0.8466,
2930
+ "step": 407
2931
+ },
2932
+ {
2933
+ "epoch": 0.011341130360456147,
2934
+ "grad_norm": 0.07837007939815521,
2935
+ "learning_rate": 0.00016897590348689606,
2936
+ "loss": 0.6156,
2937
+ "step": 408
2938
+ },
2939
+ {
2940
+ "epoch": 0.011368927248594519,
2941
+ "grad_norm": 0.09375711530447006,
2942
+ "learning_rate": 0.00016542674627869735,
2943
+ "loss": 0.7362,
2944
+ "step": 409
2945
+ },
2946
+ {
2947
+ "epoch": 0.011396724136732893,
2948
+ "grad_norm": 0.08803148567676544,
2949
+ "learning_rate": 0.0001619118951081594,
2950
+ "loss": 0.8826,
2951
+ "step": 410
2952
+ },
2953
+ {
2954
+ "epoch": 0.011424521024871266,
2955
+ "grad_norm": 0.09359045326709747,
2956
+ "learning_rate": 0.00015843149445704684,
2957
+ "loss": 0.7686,
2958
+ "step": 411
2959
+ },
2960
+ {
2961
+ "epoch": 0.011452317913009638,
2962
+ "grad_norm": 0.09178245067596436,
2963
+ "learning_rate": 0.00015498568739099906,
2964
+ "loss": 0.7662,
2965
+ "step": 412
2966
+ },
2967
+ {
2968
+ "epoch": 0.011480114801148012,
2969
+ "grad_norm": 0.0961398333311081,
2970
+ "learning_rate": 0.0001515746155536477,
2971
+ "loss": 0.8347,
2972
+ "step": 413
2973
+ },
2974
+ {
2975
+ "epoch": 0.011507911689286384,
2976
+ "grad_norm": 0.1026514321565628,
2977
+ "learning_rate": 0.0001481984191607959,
2978
+ "loss": 0.8207,
2979
+ "step": 414
2980
+ },
2981
+ {
2982
+ "epoch": 0.011535708577424758,
2983
+ "grad_norm": 0.08573547005653381,
2984
+ "learning_rate": 0.0001448572369946539,
2985
+ "loss": 0.6231,
2986
+ "step": 415
2987
+ },
2988
+ {
2989
+ "epoch": 0.01156350546556313,
2990
+ "grad_norm": 0.09467485547065735,
2991
+ "learning_rate": 0.0001415512063981339,
2992
+ "loss": 0.9214,
2993
+ "step": 416
2994
+ },
2995
+ {
2996
+ "epoch": 0.011591302353701503,
2997
+ "grad_norm": 0.0945618599653244,
2998
+ "learning_rate": 0.00013828046326920496,
2999
+ "loss": 0.749,
3000
+ "step": 417
3001
+ },
3002
+ {
3003
+ "epoch": 0.011619099241839875,
3004
+ "grad_norm": 0.10449232906103134,
3005
+ "learning_rate": 0.0001350451420553065,
3006
+ "loss": 1.0501,
3007
+ "step": 418
3008
+ },
3009
+ {
3010
+ "epoch": 0.011646896129978249,
3011
+ "grad_norm": 0.09804502129554749,
3012
+ "learning_rate": 0.0001318453757478215,
3013
+ "loss": 0.6405,
3014
+ "step": 419
3015
+ },
3016
+ {
3017
+ "epoch": 0.011674693018116623,
3018
+ "grad_norm": 0.08781873434782028,
3019
+ "learning_rate": 0.0001286812958766106,
3020
+ "loss": 0.7123,
3021
+ "step": 420
3022
+ },
3023
+ {
3024
+ "epoch": 0.011702489906254995,
3025
+ "grad_norm": 0.09648067504167557,
3026
+ "learning_rate": 0.00012555303250460438,
3027
+ "loss": 0.8559,
3028
+ "step": 421
3029
+ },
3030
+ {
3031
+ "epoch": 0.011730286794393368,
3032
+ "grad_norm": 0.09019096195697784,
3033
+ "learning_rate": 0.00012246071422245718,
3034
+ "loss": 0.761,
3035
+ "step": 422
3036
+ },
3037
+ {
3038
+ "epoch": 0.01175808368253174,
3039
+ "grad_norm": 0.09508346021175385,
3040
+ "learning_rate": 0.000119404468143262,
3041
+ "loss": 0.7989,
3042
+ "step": 423
3043
+ },
3044
+ {
3045
+ "epoch": 0.011785880570670114,
3046
+ "grad_norm": 0.08918111771345139,
3047
+ "learning_rate": 0.00011638441989732473,
3048
+ "loss": 0.6767,
3049
+ "step": 424
3050
+ },
3051
+ {
3052
+ "epoch": 0.011813677458808486,
3053
+ "grad_norm": 0.09687741100788116,
3054
+ "learning_rate": 0.00011340069362699989,
3055
+ "loss": 0.7161,
3056
+ "step": 425
3057
+ },
3058
+ {
3059
+ "epoch": 0.01184147434694686,
3060
+ "grad_norm": 0.10025037080049515,
3061
+ "learning_rate": 0.00011045341198158831,
3062
+ "loss": 0.6706,
3063
+ "step": 426
3064
+ },
3065
+ {
3066
+ "epoch": 0.011869271235085231,
3067
+ "grad_norm": 0.09664606302976608,
3068
+ "learning_rate": 0.00010754269611229428,
3069
+ "loss": 0.6177,
3070
+ "step": 427
3071
+ },
3072
+ {
3073
+ "epoch": 0.011897068123223605,
3074
+ "grad_norm": 0.09703200310468674,
3075
+ "learning_rate": 0.00010466866566724697,
3076
+ "loss": 0.6235,
3077
+ "step": 428
3078
+ },
3079
+ {
3080
+ "epoch": 0.011924865011361979,
3081
+ "grad_norm": 0.09958402812480927,
3082
+ "learning_rate": 0.00010183143878658097,
3083
+ "loss": 0.6999,
3084
+ "step": 429
3085
+ },
3086
+ {
3087
+ "epoch": 0.01195266189950035,
3088
+ "grad_norm": 0.0950227677822113,
3089
+ "learning_rate": 9.903113209758097e-05,
3090
+ "loss": 0.6983,
3091
+ "step": 430
3092
+ },
3093
+ {
3094
+ "epoch": 0.011980458787638724,
3095
+ "grad_norm": 0.09548084437847137,
3096
+ "learning_rate": 9.626786070988657e-05,
3097
+ "loss": 0.609,
3098
+ "step": 431
3099
+ },
3100
+ {
3101
+ "epoch": 0.012008255675777096,
3102
+ "grad_norm": 0.0920906737446785,
3103
+ "learning_rate": 9.354173821076184e-05,
3104
+ "loss": 0.6281,
3105
+ "step": 432
3106
+ },
3107
+ {
3108
+ "epoch": 0.01203605256391547,
3109
+ "grad_norm": 0.09439770877361298,
3110
+ "learning_rate": 9.085287666042507e-05,
3111
+ "loss": 0.6777,
3112
+ "step": 433
3113
+ },
3114
+ {
3115
+ "epoch": 0.012063849452053842,
3116
+ "grad_norm": 0.08835854381322861,
3117
+ "learning_rate": 8.820138658744304e-05,
3118
+ "loss": 0.4624,
3119
+ "step": 434
3120
+ },
3121
+ {
3122
+ "epoch": 0.012091646340192216,
3123
+ "grad_norm": 0.09508516639471054,
3124
+ "learning_rate": 8.558737698418762e-05,
3125
+ "loss": 0.5111,
3126
+ "step": 435
3127
+ },
3128
+ {
3129
+ "epoch": 0.012119443228330588,
3130
+ "grad_norm": 0.10829413682222366,
3131
+ "learning_rate": 8.301095530235491e-05,
3132
+ "loss": 0.9261,
3133
+ "step": 436
3134
+ },
3135
+ {
3136
+ "epoch": 0.012147240116468961,
3137
+ "grad_norm": 0.11397778987884521,
3138
+ "learning_rate": 8.047222744854943e-05,
3139
+ "loss": 0.6988,
3140
+ "step": 437
3141
+ },
3142
+ {
3143
+ "epoch": 0.012175037004607335,
3144
+ "grad_norm": 0.12012533843517303,
3145
+ "learning_rate": 7.79712977799295e-05,
3146
+ "loss": 0.6691,
3147
+ "step": 438
3148
+ },
3149
+ {
3150
+ "epoch": 0.012202833892745707,
3151
+ "grad_norm": 0.12243448197841644,
3152
+ "learning_rate": 7.550826909991859e-05,
3153
+ "loss": 0.649,
3154
+ "step": 439
3155
+ },
3156
+ {
3157
+ "epoch": 0.01223063078088408,
3158
+ "grad_norm": 0.10324777662754059,
3159
+ "learning_rate": 7.308324265397836e-05,
3160
+ "loss": 0.5844,
3161
+ "step": 440
3162
+ },
3163
+ {
3164
+ "epoch": 0.012258427669022453,
3165
+ "grad_norm": 0.10553352534770966,
3166
+ "learning_rate": 7.069631812544808e-05,
3167
+ "loss": 0.4693,
3168
+ "step": 441
3169
+ },
3170
+ {
3171
+ "epoch": 0.012286224557160826,
3172
+ "grad_norm": 0.11548332124948502,
3173
+ "learning_rate": 6.834759363144594e-05,
3174
+ "loss": 0.6917,
3175
+ "step": 442
3176
+ },
3177
+ {
3178
+ "epoch": 0.012314021445299198,
3179
+ "grad_norm": 0.12441360205411911,
3180
+ "learning_rate": 6.603716571883689e-05,
3181
+ "loss": 0.7703,
3182
+ "step": 443
3183
+ },
3184
+ {
3185
+ "epoch": 0.012341818333437572,
3186
+ "grad_norm": 0.12251102924346924,
3187
+ "learning_rate": 6.37651293602628e-05,
3188
+ "loss": 0.6202,
3189
+ "step": 444
3190
+ },
3191
+ {
3192
+ "epoch": 0.012369615221575944,
3193
+ "grad_norm": 0.13246478140354156,
3194
+ "learning_rate": 6.153157795023956e-05,
3195
+ "loss": 0.6897,
3196
+ "step": 445
3197
+ },
3198
+ {
3199
+ "epoch": 0.012397412109714318,
3200
+ "grad_norm": 0.12585890293121338,
3201
+ "learning_rate": 5.9336603301317516e-05,
3202
+ "loss": 0.5828,
3203
+ "step": 446
3204
+ },
3205
+ {
3206
+ "epoch": 0.012425208997852691,
3207
+ "grad_norm": 0.1446872502565384,
3208
+ "learning_rate": 5.718029564030702e-05,
3209
+ "loss": 0.5442,
3210
+ "step": 447
3211
+ },
3212
+ {
3213
+ "epoch": 0.012453005885991063,
3214
+ "grad_norm": 0.14832744002342224,
3215
+ "learning_rate": 5.5062743604570865e-05,
3216
+ "loss": 0.5683,
3217
+ "step": 448
3218
+ },
3219
+ {
3220
+ "epoch": 0.012480802774129437,
3221
+ "grad_norm": 0.1458773910999298,
3222
+ "learning_rate": 5.298403423837883e-05,
3223
+ "loss": 0.4833,
3224
+ "step": 449
3225
+ },
3226
+ {
3227
+ "epoch": 0.012508599662267809,
3228
+ "grad_norm": 0.19131259620189667,
3229
+ "learning_rate": 5.094425298933136e-05,
3230
+ "loss": 0.507,
3231
+ "step": 450
3232
+ },
3233
+ {
3234
+ "epoch": 0.012508599662267809,
3235
+ "eval_loss": 0.6939424276351929,
3236
+ "eval_runtime": 49.9667,
3237
+ "eval_samples_per_second": 11.548,
3238
+ "eval_steps_per_second": 5.784,
3239
+ "step": 450
3240
  }
3241
  ],
3242
  "logging_steps": 1,
 
3265
  "attributes": {}
3266
  }
3267
  },
3268
+ "total_flos": 2.879669611266048e+16,
3269
  "train_batch_size": 2,
3270
  "trial_name": null,
3271
  "trial_params": null