Training in progress, step 450, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 578859568
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b0c195b03a78891addfd541c6f5a6e05e308cb677dddc2b5d1e1bc7a4317910
|
3 |
size 578859568
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 294324692
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ea03775d6f122ef22f9a34fa6d7c975927ce0c5091946eb8c4d70964cfe011e
|
3 |
size 294324692
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a000bcd0fcfbd6dc706ee094bc40e59bcb50a28e8797f55a0743ef881fecdf71
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9443e18e9eff1c8055981c18d9a28ff4f85044c4c7fdc07a0fbff8845c622c60
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2879,6 +2879,364 @@
|
|
2879 |
"eval_samples_per_second": 11.541,
|
2880 |
"eval_steps_per_second": 5.78,
|
2881 |
"step": 400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2882 |
}
|
2883 |
],
|
2884 |
"logging_steps": 1,
|
@@ -2907,7 +3265,7 @@
|
|
2907 |
"attributes": {}
|
2908 |
}
|
2909 |
},
|
2910 |
-
"total_flos": 2.
|
2911 |
"train_batch_size": 2,
|
2912 |
"trial_name": null,
|
2913 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6939424276351929,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-450",
|
4 |
+
"epoch": 0.012508599662267809,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 450,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2879 |
"eval_samples_per_second": 11.541,
|
2880 |
"eval_steps_per_second": 5.78,
|
2881 |
"step": 400
|
2882 |
+
},
|
2883 |
+
{
|
2884 |
+
"epoch": 0.011146552143487537,
|
2885 |
+
"grad_norm": 0.09276453405618668,
|
2886 |
+
"learning_rate": 0.00019476814191464386,
|
2887 |
+
"loss": 0.8983,
|
2888 |
+
"step": 401
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"epoch": 0.01117434903162591,
|
2892 |
+
"grad_norm": 0.08963775634765625,
|
2893 |
+
"learning_rate": 0.00019098300562505265,
|
2894 |
+
"loss": 0.8343,
|
2895 |
+
"step": 402
|
2896 |
+
},
|
2897 |
+
{
|
2898 |
+
"epoch": 0.011202145919764282,
|
2899 |
+
"grad_norm": 0.09050919860601425,
|
2900 |
+
"learning_rate": 0.0001872311248553974,
|
2901 |
+
"loss": 0.7674,
|
2902 |
+
"step": 403
|
2903 |
+
},
|
2904 |
+
{
|
2905 |
+
"epoch": 0.011229942807902656,
|
2906 |
+
"grad_norm": 0.1092870682477951,
|
2907 |
+
"learning_rate": 0.00018351265383080128,
|
2908 |
+
"loss": 1.0618,
|
2909 |
+
"step": 404
|
2910 |
+
},
|
2911 |
+
{
|
2912 |
+
"epoch": 0.011257739696041028,
|
2913 |
+
"grad_norm": 0.08485256880521774,
|
2914 |
+
"learning_rate": 0.00017982774540304403,
|
2915 |
+
"loss": 0.7261,
|
2916 |
+
"step": 405
|
2917 |
+
},
|
2918 |
+
{
|
2919 |
+
"epoch": 0.011285536584179402,
|
2920 |
+
"grad_norm": 0.08782031387090683,
|
2921 |
+
"learning_rate": 0.00017617655104427832,
|
2922 |
+
"loss": 0.7258,
|
2923 |
+
"step": 406
|
2924 |
+
},
|
2925 |
+
{
|
2926 |
+
"epoch": 0.011313333472317774,
|
2927 |
+
"grad_norm": 0.0893518328666687,
|
2928 |
+
"learning_rate": 0.00017255922084080368,
|
2929 |
+
"loss": 0.8466,
|
2930 |
+
"step": 407
|
2931 |
+
},
|
2932 |
+
{
|
2933 |
+
"epoch": 0.011341130360456147,
|
2934 |
+
"grad_norm": 0.07837007939815521,
|
2935 |
+
"learning_rate": 0.00016897590348689606,
|
2936 |
+
"loss": 0.6156,
|
2937 |
+
"step": 408
|
2938 |
+
},
|
2939 |
+
{
|
2940 |
+
"epoch": 0.011368927248594519,
|
2941 |
+
"grad_norm": 0.09375711530447006,
|
2942 |
+
"learning_rate": 0.00016542674627869735,
|
2943 |
+
"loss": 0.7362,
|
2944 |
+
"step": 409
|
2945 |
+
},
|
2946 |
+
{
|
2947 |
+
"epoch": 0.011396724136732893,
|
2948 |
+
"grad_norm": 0.08803148567676544,
|
2949 |
+
"learning_rate": 0.0001619118951081594,
|
2950 |
+
"loss": 0.8826,
|
2951 |
+
"step": 410
|
2952 |
+
},
|
2953 |
+
{
|
2954 |
+
"epoch": 0.011424521024871266,
|
2955 |
+
"grad_norm": 0.09359045326709747,
|
2956 |
+
"learning_rate": 0.00015843149445704684,
|
2957 |
+
"loss": 0.7686,
|
2958 |
+
"step": 411
|
2959 |
+
},
|
2960 |
+
{
|
2961 |
+
"epoch": 0.011452317913009638,
|
2962 |
+
"grad_norm": 0.09178245067596436,
|
2963 |
+
"learning_rate": 0.00015498568739099906,
|
2964 |
+
"loss": 0.7662,
|
2965 |
+
"step": 412
|
2966 |
+
},
|
2967 |
+
{
|
2968 |
+
"epoch": 0.011480114801148012,
|
2969 |
+
"grad_norm": 0.0961398333311081,
|
2970 |
+
"learning_rate": 0.0001515746155536477,
|
2971 |
+
"loss": 0.8347,
|
2972 |
+
"step": 413
|
2973 |
+
},
|
2974 |
+
{
|
2975 |
+
"epoch": 0.011507911689286384,
|
2976 |
+
"grad_norm": 0.1026514321565628,
|
2977 |
+
"learning_rate": 0.0001481984191607959,
|
2978 |
+
"loss": 0.8207,
|
2979 |
+
"step": 414
|
2980 |
+
},
|
2981 |
+
{
|
2982 |
+
"epoch": 0.011535708577424758,
|
2983 |
+
"grad_norm": 0.08573547005653381,
|
2984 |
+
"learning_rate": 0.0001448572369946539,
|
2985 |
+
"loss": 0.6231,
|
2986 |
+
"step": 415
|
2987 |
+
},
|
2988 |
+
{
|
2989 |
+
"epoch": 0.01156350546556313,
|
2990 |
+
"grad_norm": 0.09467485547065735,
|
2991 |
+
"learning_rate": 0.0001415512063981339,
|
2992 |
+
"loss": 0.9214,
|
2993 |
+
"step": 416
|
2994 |
+
},
|
2995 |
+
{
|
2996 |
+
"epoch": 0.011591302353701503,
|
2997 |
+
"grad_norm": 0.0945618599653244,
|
2998 |
+
"learning_rate": 0.00013828046326920496,
|
2999 |
+
"loss": 0.749,
|
3000 |
+
"step": 417
|
3001 |
+
},
|
3002 |
+
{
|
3003 |
+
"epoch": 0.011619099241839875,
|
3004 |
+
"grad_norm": 0.10449232906103134,
|
3005 |
+
"learning_rate": 0.0001350451420553065,
|
3006 |
+
"loss": 1.0501,
|
3007 |
+
"step": 418
|
3008 |
+
},
|
3009 |
+
{
|
3010 |
+
"epoch": 0.011646896129978249,
|
3011 |
+
"grad_norm": 0.09804502129554749,
|
3012 |
+
"learning_rate": 0.0001318453757478215,
|
3013 |
+
"loss": 0.6405,
|
3014 |
+
"step": 419
|
3015 |
+
},
|
3016 |
+
{
|
3017 |
+
"epoch": 0.011674693018116623,
|
3018 |
+
"grad_norm": 0.08781873434782028,
|
3019 |
+
"learning_rate": 0.0001286812958766106,
|
3020 |
+
"loss": 0.7123,
|
3021 |
+
"step": 420
|
3022 |
+
},
|
3023 |
+
{
|
3024 |
+
"epoch": 0.011702489906254995,
|
3025 |
+
"grad_norm": 0.09648067504167557,
|
3026 |
+
"learning_rate": 0.00012555303250460438,
|
3027 |
+
"loss": 0.8559,
|
3028 |
+
"step": 421
|
3029 |
+
},
|
3030 |
+
{
|
3031 |
+
"epoch": 0.011730286794393368,
|
3032 |
+
"grad_norm": 0.09019096195697784,
|
3033 |
+
"learning_rate": 0.00012246071422245718,
|
3034 |
+
"loss": 0.761,
|
3035 |
+
"step": 422
|
3036 |
+
},
|
3037 |
+
{
|
3038 |
+
"epoch": 0.01175808368253174,
|
3039 |
+
"grad_norm": 0.09508346021175385,
|
3040 |
+
"learning_rate": 0.000119404468143262,
|
3041 |
+
"loss": 0.7989,
|
3042 |
+
"step": 423
|
3043 |
+
},
|
3044 |
+
{
|
3045 |
+
"epoch": 0.011785880570670114,
|
3046 |
+
"grad_norm": 0.08918111771345139,
|
3047 |
+
"learning_rate": 0.00011638441989732473,
|
3048 |
+
"loss": 0.6767,
|
3049 |
+
"step": 424
|
3050 |
+
},
|
3051 |
+
{
|
3052 |
+
"epoch": 0.011813677458808486,
|
3053 |
+
"grad_norm": 0.09687741100788116,
|
3054 |
+
"learning_rate": 0.00011340069362699989,
|
3055 |
+
"loss": 0.7161,
|
3056 |
+
"step": 425
|
3057 |
+
},
|
3058 |
+
{
|
3059 |
+
"epoch": 0.01184147434694686,
|
3060 |
+
"grad_norm": 0.10025037080049515,
|
3061 |
+
"learning_rate": 0.00011045341198158831,
|
3062 |
+
"loss": 0.6706,
|
3063 |
+
"step": 426
|
3064 |
+
},
|
3065 |
+
{
|
3066 |
+
"epoch": 0.011869271235085231,
|
3067 |
+
"grad_norm": 0.09664606302976608,
|
3068 |
+
"learning_rate": 0.00010754269611229428,
|
3069 |
+
"loss": 0.6177,
|
3070 |
+
"step": 427
|
3071 |
+
},
|
3072 |
+
{
|
3073 |
+
"epoch": 0.011897068123223605,
|
3074 |
+
"grad_norm": 0.09703200310468674,
|
3075 |
+
"learning_rate": 0.00010466866566724697,
|
3076 |
+
"loss": 0.6235,
|
3077 |
+
"step": 428
|
3078 |
+
},
|
3079 |
+
{
|
3080 |
+
"epoch": 0.011924865011361979,
|
3081 |
+
"grad_norm": 0.09958402812480927,
|
3082 |
+
"learning_rate": 0.00010183143878658097,
|
3083 |
+
"loss": 0.6999,
|
3084 |
+
"step": 429
|
3085 |
+
},
|
3086 |
+
{
|
3087 |
+
"epoch": 0.01195266189950035,
|
3088 |
+
"grad_norm": 0.0950227677822113,
|
3089 |
+
"learning_rate": 9.903113209758097e-05,
|
3090 |
+
"loss": 0.6983,
|
3091 |
+
"step": 430
|
3092 |
+
},
|
3093 |
+
{
|
3094 |
+
"epoch": 0.011980458787638724,
|
3095 |
+
"grad_norm": 0.09548084437847137,
|
3096 |
+
"learning_rate": 9.626786070988657e-05,
|
3097 |
+
"loss": 0.609,
|
3098 |
+
"step": 431
|
3099 |
+
},
|
3100 |
+
{
|
3101 |
+
"epoch": 0.012008255675777096,
|
3102 |
+
"grad_norm": 0.0920906737446785,
|
3103 |
+
"learning_rate": 9.354173821076184e-05,
|
3104 |
+
"loss": 0.6281,
|
3105 |
+
"step": 432
|
3106 |
+
},
|
3107 |
+
{
|
3108 |
+
"epoch": 0.01203605256391547,
|
3109 |
+
"grad_norm": 0.09439770877361298,
|
3110 |
+
"learning_rate": 9.085287666042507e-05,
|
3111 |
+
"loss": 0.6777,
|
3112 |
+
"step": 433
|
3113 |
+
},
|
3114 |
+
{
|
3115 |
+
"epoch": 0.012063849452053842,
|
3116 |
+
"grad_norm": 0.08835854381322861,
|
3117 |
+
"learning_rate": 8.820138658744304e-05,
|
3118 |
+
"loss": 0.4624,
|
3119 |
+
"step": 434
|
3120 |
+
},
|
3121 |
+
{
|
3122 |
+
"epoch": 0.012091646340192216,
|
3123 |
+
"grad_norm": 0.09508516639471054,
|
3124 |
+
"learning_rate": 8.558737698418762e-05,
|
3125 |
+
"loss": 0.5111,
|
3126 |
+
"step": 435
|
3127 |
+
},
|
3128 |
+
{
|
3129 |
+
"epoch": 0.012119443228330588,
|
3130 |
+
"grad_norm": 0.10829413682222366,
|
3131 |
+
"learning_rate": 8.301095530235491e-05,
|
3132 |
+
"loss": 0.9261,
|
3133 |
+
"step": 436
|
3134 |
+
},
|
3135 |
+
{
|
3136 |
+
"epoch": 0.012147240116468961,
|
3137 |
+
"grad_norm": 0.11397778987884521,
|
3138 |
+
"learning_rate": 8.047222744854943e-05,
|
3139 |
+
"loss": 0.6988,
|
3140 |
+
"step": 437
|
3141 |
+
},
|
3142 |
+
{
|
3143 |
+
"epoch": 0.012175037004607335,
|
3144 |
+
"grad_norm": 0.12012533843517303,
|
3145 |
+
"learning_rate": 7.79712977799295e-05,
|
3146 |
+
"loss": 0.6691,
|
3147 |
+
"step": 438
|
3148 |
+
},
|
3149 |
+
{
|
3150 |
+
"epoch": 0.012202833892745707,
|
3151 |
+
"grad_norm": 0.12243448197841644,
|
3152 |
+
"learning_rate": 7.550826909991859e-05,
|
3153 |
+
"loss": 0.649,
|
3154 |
+
"step": 439
|
3155 |
+
},
|
3156 |
+
{
|
3157 |
+
"epoch": 0.01223063078088408,
|
3158 |
+
"grad_norm": 0.10324777662754059,
|
3159 |
+
"learning_rate": 7.308324265397836e-05,
|
3160 |
+
"loss": 0.5844,
|
3161 |
+
"step": 440
|
3162 |
+
},
|
3163 |
+
{
|
3164 |
+
"epoch": 0.012258427669022453,
|
3165 |
+
"grad_norm": 0.10553352534770966,
|
3166 |
+
"learning_rate": 7.069631812544808e-05,
|
3167 |
+
"loss": 0.4693,
|
3168 |
+
"step": 441
|
3169 |
+
},
|
3170 |
+
{
|
3171 |
+
"epoch": 0.012286224557160826,
|
3172 |
+
"grad_norm": 0.11548332124948502,
|
3173 |
+
"learning_rate": 6.834759363144594e-05,
|
3174 |
+
"loss": 0.6917,
|
3175 |
+
"step": 442
|
3176 |
+
},
|
3177 |
+
{
|
3178 |
+
"epoch": 0.012314021445299198,
|
3179 |
+
"grad_norm": 0.12441360205411911,
|
3180 |
+
"learning_rate": 6.603716571883689e-05,
|
3181 |
+
"loss": 0.7703,
|
3182 |
+
"step": 443
|
3183 |
+
},
|
3184 |
+
{
|
3185 |
+
"epoch": 0.012341818333437572,
|
3186 |
+
"grad_norm": 0.12251102924346924,
|
3187 |
+
"learning_rate": 6.37651293602628e-05,
|
3188 |
+
"loss": 0.6202,
|
3189 |
+
"step": 444
|
3190 |
+
},
|
3191 |
+
{
|
3192 |
+
"epoch": 0.012369615221575944,
|
3193 |
+
"grad_norm": 0.13246478140354156,
|
3194 |
+
"learning_rate": 6.153157795023956e-05,
|
3195 |
+
"loss": 0.6897,
|
3196 |
+
"step": 445
|
3197 |
+
},
|
3198 |
+
{
|
3199 |
+
"epoch": 0.012397412109714318,
|
3200 |
+
"grad_norm": 0.12585890293121338,
|
3201 |
+
"learning_rate": 5.9336603301317516e-05,
|
3202 |
+
"loss": 0.5828,
|
3203 |
+
"step": 446
|
3204 |
+
},
|
3205 |
+
{
|
3206 |
+
"epoch": 0.012425208997852691,
|
3207 |
+
"grad_norm": 0.1446872502565384,
|
3208 |
+
"learning_rate": 5.718029564030702e-05,
|
3209 |
+
"loss": 0.5442,
|
3210 |
+
"step": 447
|
3211 |
+
},
|
3212 |
+
{
|
3213 |
+
"epoch": 0.012453005885991063,
|
3214 |
+
"grad_norm": 0.14832744002342224,
|
3215 |
+
"learning_rate": 5.5062743604570865e-05,
|
3216 |
+
"loss": 0.5683,
|
3217 |
+
"step": 448
|
3218 |
+
},
|
3219 |
+
{
|
3220 |
+
"epoch": 0.012480802774129437,
|
3221 |
+
"grad_norm": 0.1458773910999298,
|
3222 |
+
"learning_rate": 5.298403423837883e-05,
|
3223 |
+
"loss": 0.4833,
|
3224 |
+
"step": 449
|
3225 |
+
},
|
3226 |
+
{
|
3227 |
+
"epoch": 0.012508599662267809,
|
3228 |
+
"grad_norm": 0.19131259620189667,
|
3229 |
+
"learning_rate": 5.094425298933136e-05,
|
3230 |
+
"loss": 0.507,
|
3231 |
+
"step": 450
|
3232 |
+
},
|
3233 |
+
{
|
3234 |
+
"epoch": 0.012508599662267809,
|
3235 |
+
"eval_loss": 0.6939424276351929,
|
3236 |
+
"eval_runtime": 49.9667,
|
3237 |
+
"eval_samples_per_second": 11.548,
|
3238 |
+
"eval_steps_per_second": 5.784,
|
3239 |
+
"step": 450
|
3240 |
}
|
3241 |
],
|
3242 |
"logging_steps": 1,
|
|
|
3265 |
"attributes": {}
|
3266 |
}
|
3267 |
},
|
3268 |
+
"total_flos": 2.879669611266048e+16,
|
3269 |
"train_batch_size": 2,
|
3270 |
"trial_name": null,
|
3271 |
"trial_params": null
|