diff --git a/checkpoint-11187/config.json b/checkpoint-11187/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-11187/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-11187/generation_config.json b/checkpoint-11187/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-11187/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-11187/model-00001-of-00002.safetensors b/checkpoint-11187/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..03ae3febb8db8109bd8debd082c0e1f80d11cf3e --- /dev/null +++ b/checkpoint-11187/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a70e8357cbb3608475754c5d4356a63b69f760776340354f8406c72381c56af +size 4992706480 diff --git a/checkpoint-11187/model-00002-of-00002.safetensors b/checkpoint-11187/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50ad77896ad952fdf5e510976ac24ec645294ea9 --- /dev/null +++ b/checkpoint-11187/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af621993d6d1098dc2bc5fc088640cc33ee07cf43c8901a11360ba66b032480 +size 1180663192 diff --git a/checkpoint-11187/model.safetensors.index.json b/checkpoint-11187/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-11187/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-11187/optimizer.pt b/checkpoint-11187/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..70ae3a0058e3a7ff851a8dbbf8e07187f770e953 --- /dev/null +++ b/checkpoint-11187/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dae8d4b1de57b0280be379c2183fb590adc244ec78ba6d0499fc2cd80e013bd +size 3095074288 diff --git a/checkpoint-11187/preprocessor_config.json b/checkpoint-11187/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-11187/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-11187/rng_state.pth b/checkpoint-11187/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..45194a50bac4922a801c7175fc64524390bf3bdb --- /dev/null +++ b/checkpoint-11187/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c1fe439e2ed06ec288514fea1b232061abe40004525cb256570d161743fcdb +size 14244 diff --git a/checkpoint-11187/scheduler.pt b/checkpoint-11187/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b111eefd49fb642e414efec5aee1b4b5ec6d9631 --- /dev/null +++ b/checkpoint-11187/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddeaf5db1f24a634b0837bc7a1112c08ef16c1a47b9d0e6b9e76f836bec6e49a +size 1064 diff --git a/checkpoint-11187/trainer_state.json b/checkpoint-11187/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..67237fb8422b40218f56619ba0622079cd8bea3a --- /dev/null +++ b/checkpoint-11187/trainer_state.json @@ -0,0 +1,2115 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8992765273311897, + "eval_steps": 500, + "global_step": 11187, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + }, + { + "epoch": 0.5, + "learning_rate": 5.307850179579271e-07, + "loss": 0.1467, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 5.280485719172225e-07, + "loss": 0.2702, + "step": 6272 + }, + { + "epoch": 0.51, + "learning_rate": 5.253121258765179e-07, + "loss": 0.1736, + "step": 6304 + }, + { + "epoch": 0.51, + "learning_rate": 5.225756798358132e-07, + "loss": 0.2281, + "step": 6336 + }, + { + "epoch": 0.51, + "learning_rate": 5.198392337951087e-07, + "loss": 0.2114, + "step": 6368 + }, + { + "epoch": 0.51, + "learning_rate": 5.17102787754404e-07, + "loss": 0.2252, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 5.143663417136994e-07, + "loss": 0.163, + "step": 6432 + }, + { + "epoch": 0.52, + "learning_rate": 5.116298956729946e-07, + "loss": 0.266, + "step": 6464 + }, + { + "epoch": 0.52, + "learning_rate": 5.0889344963229e-07, + "loss": 0.2154, + "step": 6496 + }, + { + "epoch": 0.52, + "learning_rate": 5.061570035915854e-07, + "loss": 0.2003, + "step": 6528 + }, + { + "epoch": 0.53, + "learning_rate": 5.034205575508807e-07, + "loss": 0.1969, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 5.006841115101761e-07, + "loss": 0.2436, + "step": 6592 + }, + { + "epoch": 0.53, + "learning_rate": 4.979476654694714e-07, + "loss": 0.134, + "step": 6624 + }, + { + "epoch": 0.54, + "learning_rate": 4.952112194287669e-07, + "loss": 0.2246, + "step": 6656 + }, + { + "epoch": 0.54, + "learning_rate": 4.924747733880622e-07, + "loss": 0.1873, + "step": 6688 + }, + { + "epoch": 0.54, + "learning_rate": 4.897383273473576e-07, + "loss": 0.1549, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.87001881306653e-07, + "loss": 0.2518, + "step": 6752 + }, + { + "epoch": 0.55, + "learning_rate": 4.842654352659483e-07, + "loss": 0.2676, + "step": 6784 + }, + { + "epoch": 0.55, + "learning_rate": 4.815289892252437e-07, + "loss": 0.1537, + "step": 6816 + }, + { + "epoch": 0.55, + "learning_rate": 4.787925431845391e-07, + "loss": 0.1925, + "step": 6848 + }, + { + "epoch": 0.55, + "learning_rate": 4.760560971438344e-07, + "loss": 0.2157, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.7331965110312977e-07, + "loss": 0.1923, + "step": 6912 + }, + { + "epoch": 0.56, + "learning_rate": 4.7058320506242517e-07, + "loss": 0.1922, + "step": 6944 + }, + { + "epoch": 0.56, + "learning_rate": 4.678467590217205e-07, + "loss": 0.257, + "step": 6976 + }, + { + "epoch": 0.56, + "learning_rate": 4.6511031298101586e-07, + "loss": 0.1744, + "step": 7008 + }, + { + "epoch": 0.57, + "learning_rate": 4.6237386694031126e-07, + "loss": 0.2281, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.596374208996066e-07, + "loss": 0.2135, + "step": 7072 + }, + { + "epoch": 0.57, + "learning_rate": 4.56900974858902e-07, + "loss": 0.1841, + "step": 7104 + }, + { + "epoch": 0.57, + "learning_rate": 4.5416452881819735e-07, + "loss": 0.2652, + "step": 7136 + }, + { + "epoch": 0.58, + "learning_rate": 4.5142808277749275e-07, + "loss": 0.225, + "step": 7168 + }, + { + "epoch": 0.58, + "learning_rate": 4.4869163673678804e-07, + "loss": 0.2377, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4595519069608344e-07, + "loss": 0.2323, + "step": 7232 + }, + { + "epoch": 0.58, + "learning_rate": 4.432187446553788e-07, + "loss": 0.2227, + "step": 7264 + }, + { + "epoch": 0.59, + "learning_rate": 4.404822986146742e-07, + "loss": 0.2968, + "step": 7296 + }, + { + "epoch": 0.59, + "learning_rate": 4.377458525739695e-07, + "loss": 0.2623, + "step": 7328 + }, + { + "epoch": 0.59, + "learning_rate": 4.350094065332649e-07, + "loss": 0.1943, + "step": 7360 + }, + { + "epoch": 0.59, + "learning_rate": 4.322729604925603e-07, + "loss": 0.158, + "step": 7392 + }, + { + "epoch": 0.6, + "learning_rate": 4.2953651445185563e-07, + "loss": 0.2413, + "step": 7424 + }, + { + "epoch": 0.6, + "learning_rate": 4.2680006841115103e-07, + "loss": 0.2413, + "step": 7456 + }, + { + "epoch": 0.6, + "learning_rate": 4.240636223704464e-07, + "loss": 0.1793, + "step": 7488 + }, + { + "epoch": 0.6, + "learning_rate": 4.213271763297417e-07, + "loss": 0.2505, + "step": 7520 + }, + { + "epoch": 0.61, + "learning_rate": 4.1859073028903707e-07, + "loss": 0.2078, + "step": 7552 + }, + { + "epoch": 0.61, + "learning_rate": 4.1585428424833247e-07, + "loss": 0.2603, + "step": 7584 + }, + { + "epoch": 0.61, + "learning_rate": 4.131178382076278e-07, + "loss": 0.1426, + "step": 7616 + }, + { + "epoch": 0.61, + "learning_rate": 4.103813921669232e-07, + "loss": 0.1823, + "step": 7648 + }, + { + "epoch": 0.62, + "learning_rate": 4.0764494612621856e-07, + "loss": 0.2215, + "step": 7680 + }, + { + "epoch": 0.62, + "learning_rate": 4.049085000855139e-07, + "loss": 0.2956, + "step": 7712 + }, + { + "epoch": 0.62, + "learning_rate": 4.021720540448093e-07, + "loss": 0.1766, + "step": 7744 + }, + { + "epoch": 0.63, + "learning_rate": 3.9943560800410465e-07, + "loss": 0.2124, + "step": 7776 + }, + { + "epoch": 0.63, + "learning_rate": 3.9669916196340005e-07, + "loss": 0.2123, + "step": 7808 + }, + { + "epoch": 0.63, + "learning_rate": 3.9396271592269535e-07, + "loss": 0.2172, + "step": 7840 + }, + { + "epoch": 0.63, + "learning_rate": 3.9122626988199075e-07, + "loss": 0.1737, + "step": 7872 + }, + { + "epoch": 0.64, + "learning_rate": 3.884898238412861e-07, + "loss": 0.2504, + "step": 7904 + }, + { + "epoch": 0.64, + "learning_rate": 3.857533778005815e-07, + "loss": 0.2242, + "step": 7936 + }, + { + "epoch": 0.64, + "learning_rate": 3.8301693175987684e-07, + "loss": 0.205, + "step": 7968 + }, + { + "epoch": 0.64, + "learning_rate": 3.802804857191722e-07, + "loss": 0.2286, + "step": 8000 + }, + { + "epoch": 0.65, + "learning_rate": 3.775440396784676e-07, + "loss": 0.1848, + "step": 8032 + }, + { + "epoch": 0.65, + "learning_rate": 3.7480759363776293e-07, + "loss": 0.2065, + "step": 8064 + }, + { + "epoch": 0.65, + "learning_rate": 3.7207114759705833e-07, + "loss": 0.1688, + "step": 8096 + }, + { + "epoch": 0.65, + "learning_rate": 3.693347015563537e-07, + "loss": 0.2489, + "step": 8128 + }, + { + "epoch": 0.66, + "learning_rate": 3.66598255515649e-07, + "loss": 0.2309, + "step": 8160 + }, + { + "epoch": 0.66, + "learning_rate": 3.6386180947494437e-07, + "loss": 0.1236, + "step": 8192 + }, + { + "epoch": 0.66, + "learning_rate": 3.6112536343423977e-07, + "loss": 0.2242, + "step": 8224 + }, + { + "epoch": 0.66, + "learning_rate": 3.583889173935351e-07, + "loss": 0.221, + "step": 8256 + }, + { + "epoch": 0.67, + "learning_rate": 3.556524713528305e-07, + "loss": 0.1663, + "step": 8288 + }, + { + "epoch": 0.67, + "learning_rate": 3.5291602531212586e-07, + "loss": 0.2063, + "step": 8320 + }, + { + "epoch": 0.67, + "learning_rate": 3.501795792714212e-07, + "loss": 0.2651, + "step": 8352 + }, + { + "epoch": 0.67, + "learning_rate": 3.474431332307166e-07, + "loss": 0.2451, + "step": 8384 + }, + { + "epoch": 0.68, + "learning_rate": 3.4470668719001196e-07, + "loss": 0.1645, + "step": 8416 + }, + { + "epoch": 0.68, + "learning_rate": 3.4197024114930736e-07, + "loss": 0.1446, + "step": 8448 + }, + { + "epoch": 0.68, + "learning_rate": 3.3923379510860265e-07, + "loss": 0.2006, + "step": 8480 + }, + { + "epoch": 0.68, + "learning_rate": 3.3649734906789805e-07, + "loss": 0.1702, + "step": 8512 + }, + { + "epoch": 0.69, + "learning_rate": 3.337609030271934e-07, + "loss": 0.2685, + "step": 8544 + }, + { + "epoch": 0.69, + "learning_rate": 3.310244569864888e-07, + "loss": 0.1944, + "step": 8576 + }, + { + "epoch": 0.69, + "learning_rate": 3.2828801094578414e-07, + "loss": 0.1461, + "step": 8608 + }, + { + "epoch": 0.69, + "learning_rate": 3.255515649050795e-07, + "loss": 0.2539, + "step": 8640 + }, + { + "epoch": 0.7, + "learning_rate": 3.228151188643749e-07, + "loss": 0.1783, + "step": 8672 + }, + { + "epoch": 0.7, + "learning_rate": 3.2007867282367024e-07, + "loss": 0.2084, + "step": 8704 + }, + { + "epoch": 0.7, + "learning_rate": 3.1734222678296563e-07, + "loss": 0.1596, + "step": 8736 + }, + { + "epoch": 0.7, + "learning_rate": 3.14605780742261e-07, + "loss": 0.1677, + "step": 8768 + }, + { + "epoch": 0.71, + "learning_rate": 3.1186933470155633e-07, + "loss": 0.1636, + "step": 8800 + }, + { + "epoch": 0.71, + "learning_rate": 3.091328886608517e-07, + "loss": 0.2336, + "step": 8832 + }, + { + "epoch": 0.71, + "learning_rate": 3.063964426201471e-07, + "loss": 0.1853, + "step": 8864 + }, + { + "epoch": 0.72, + "learning_rate": 3.036599965794424e-07, + "loss": 0.2304, + "step": 8896 + }, + { + "epoch": 0.72, + "learning_rate": 3.009235505387378e-07, + "loss": 0.2037, + "step": 8928 + }, + { + "epoch": 0.72, + "learning_rate": 2.9818710449803317e-07, + "loss": 0.2159, + "step": 8960 + }, + { + "epoch": 0.72, + "learning_rate": 2.954506584573285e-07, + "loss": 0.2512, + "step": 8992 + }, + { + "epoch": 0.73, + "learning_rate": 2.927142124166239e-07, + "loss": 0.2011, + "step": 9024 + }, + { + "epoch": 0.73, + "learning_rate": 2.8997776637591926e-07, + "loss": 0.1677, + "step": 9056 + }, + { + "epoch": 0.73, + "learning_rate": 2.8724132033521466e-07, + "loss": 0.192, + "step": 9088 + }, + { + "epoch": 0.73, + "learning_rate": 2.8450487429450995e-07, + "loss": 0.1615, + "step": 9120 + }, + { + "epoch": 0.74, + "learning_rate": 2.8176842825380535e-07, + "loss": 0.1646, + "step": 9152 + }, + { + "epoch": 0.74, + "learning_rate": 2.790319822131007e-07, + "loss": 0.2301, + "step": 9184 + }, + { + "epoch": 0.74, + "learning_rate": 2.762955361723961e-07, + "loss": 0.1663, + "step": 9216 + }, + { + "epoch": 0.74, + "learning_rate": 2.7355909013169145e-07, + "loss": 0.251, + "step": 9248 + }, + { + "epoch": 0.75, + "learning_rate": 2.708226440909868e-07, + "loss": 0.2301, + "step": 9280 + }, + { + "epoch": 0.75, + "learning_rate": 2.680861980502822e-07, + "loss": 0.2222, + "step": 9312 + }, + { + "epoch": 0.75, + "learning_rate": 2.6534975200957754e-07, + "loss": 0.1784, + "step": 9344 + }, + { + "epoch": 0.75, + "learning_rate": 2.6261330596887294e-07, + "loss": 0.1714, + "step": 9376 + }, + { + "epoch": 0.76, + "learning_rate": 2.598768599281683e-07, + "loss": 0.2258, + "step": 9408 + }, + { + "epoch": 0.76, + "learning_rate": 2.571404138874637e-07, + "loss": 0.1907, + "step": 9440 + }, + { + "epoch": 0.76, + "learning_rate": 2.54403967846759e-07, + "loss": 0.1938, + "step": 9472 + }, + { + "epoch": 0.76, + "learning_rate": 2.516675218060544e-07, + "loss": 0.1831, + "step": 9504 + }, + { + "epoch": 0.77, + "learning_rate": 2.489310757653497e-07, + "loss": 0.1833, + "step": 9536 + }, + { + "epoch": 0.77, + "learning_rate": 2.461946297246451e-07, + "loss": 0.2551, + "step": 9568 + }, + { + "epoch": 0.77, + "learning_rate": 2.4345818368394047e-07, + "loss": 0.1553, + "step": 9600 + }, + { + "epoch": 0.77, + "learning_rate": 2.407217376432358e-07, + "loss": 0.1785, + "step": 9632 + }, + { + "epoch": 0.78, + "learning_rate": 2.379852916025312e-07, + "loss": 0.2903, + "step": 9664 + }, + { + "epoch": 0.78, + "learning_rate": 2.3524884556182656e-07, + "loss": 0.2093, + "step": 9696 + }, + { + "epoch": 0.78, + "learning_rate": 2.325123995211219e-07, + "loss": 0.2865, + "step": 9728 + }, + { + "epoch": 0.78, + "learning_rate": 2.2977595348041728e-07, + "loss": 0.2033, + "step": 9760 + }, + { + "epoch": 0.79, + "learning_rate": 2.2703950743971268e-07, + "loss": 0.1929, + "step": 9792 + }, + { + "epoch": 0.79, + "learning_rate": 2.2430306139900803e-07, + "loss": 0.1968, + "step": 9824 + }, + { + "epoch": 0.79, + "learning_rate": 2.215666153583034e-07, + "loss": 0.1985, + "step": 9856 + }, + { + "epoch": 0.79, + "learning_rate": 2.1883016931759877e-07, + "loss": 0.1509, + "step": 9888 + }, + { + "epoch": 0.8, + "learning_rate": 2.1609372327689412e-07, + "loss": 0.1988, + "step": 9920 + }, + { + "epoch": 0.8, + "learning_rate": 2.133572772361895e-07, + "loss": 0.2059, + "step": 9952 + }, + { + "epoch": 0.8, + "learning_rate": 2.1062083119548484e-07, + "loss": 0.1701, + "step": 9984 + }, + { + "epoch": 0.81, + "learning_rate": 2.0788438515478021e-07, + "loss": 0.1524, + "step": 10016 + }, + { + "epoch": 0.81, + "learning_rate": 2.051479391140756e-07, + "loss": 0.2236, + "step": 10048 + }, + { + "epoch": 0.81, + "learning_rate": 2.0241149307337093e-07, + "loss": 0.189, + "step": 10080 + }, + { + "epoch": 0.81, + "learning_rate": 1.9967504703266633e-07, + "loss": 0.1464, + "step": 10112 + }, + { + "epoch": 0.82, + "learning_rate": 1.9693860099196168e-07, + "loss": 0.2245, + "step": 10144 + }, + { + "epoch": 0.82, + "learning_rate": 1.9420215495125705e-07, + "loss": 0.2619, + "step": 10176 + }, + { + "epoch": 0.82, + "learning_rate": 1.9146570891055243e-07, + "loss": 0.2331, + "step": 10208 + }, + { + "epoch": 0.82, + "learning_rate": 1.8872926286984777e-07, + "loss": 0.1925, + "step": 10240 + }, + { + "epoch": 0.83, + "learning_rate": 1.8599281682914315e-07, + "loss": 0.2707, + "step": 10272 + }, + { + "epoch": 0.83, + "learning_rate": 1.832563707884385e-07, + "loss": 0.1792, + "step": 10304 + }, + { + "epoch": 0.83, + "learning_rate": 1.8051992474773387e-07, + "loss": 0.1663, + "step": 10336 + }, + { + "epoch": 0.83, + "learning_rate": 1.7778347870702924e-07, + "loss": 0.1783, + "step": 10368 + }, + { + "epoch": 0.84, + "learning_rate": 1.7504703266632459e-07, + "loss": 0.2028, + "step": 10400 + }, + { + "epoch": 0.84, + "learning_rate": 1.7231058662561998e-07, + "loss": 0.2036, + "step": 10432 + }, + { + "epoch": 0.84, + "learning_rate": 1.6957414058491533e-07, + "loss": 0.1832, + "step": 10464 + }, + { + "epoch": 0.84, + "learning_rate": 1.668376945442107e-07, + "loss": 0.1795, + "step": 10496 + }, + { + "epoch": 0.85, + "learning_rate": 1.6410124850350608e-07, + "loss": 0.1507, + "step": 10528 + }, + { + "epoch": 0.85, + "learning_rate": 1.6136480246280142e-07, + "loss": 0.2078, + "step": 10560 + }, + { + "epoch": 0.85, + "learning_rate": 1.586283564220968e-07, + "loss": 0.2228, + "step": 10592 + }, + { + "epoch": 0.85, + "learning_rate": 1.5589191038139214e-07, + "loss": 0.2, + "step": 10624 + }, + { + "epoch": 0.86, + "learning_rate": 1.5315546434068752e-07, + "loss": 0.1615, + "step": 10656 + }, + { + "epoch": 0.86, + "learning_rate": 1.504190182999829e-07, + "loss": 0.1746, + "step": 10688 + }, + { + "epoch": 0.86, + "learning_rate": 1.4768257225927824e-07, + "loss": 0.1857, + "step": 10720 + }, + { + "epoch": 0.86, + "learning_rate": 1.4494612621857364e-07, + "loss": 0.2143, + "step": 10752 + }, + { + "epoch": 0.87, + "learning_rate": 1.4220968017786898e-07, + "loss": 0.1919, + "step": 10784 + }, + { + "epoch": 0.87, + "learning_rate": 1.3947323413716436e-07, + "loss": 0.1885, + "step": 10816 + }, + { + "epoch": 0.87, + "learning_rate": 1.3673678809645973e-07, + "loss": 0.1617, + "step": 10848 + }, + { + "epoch": 0.87, + "learning_rate": 1.3400034205575508e-07, + "loss": 0.1512, + "step": 10880 + }, + { + "epoch": 0.88, + "learning_rate": 1.3126389601505045e-07, + "loss": 0.1666, + "step": 10912 + }, + { + "epoch": 0.88, + "learning_rate": 1.2852744997434582e-07, + "loss": 0.1519, + "step": 10944 + }, + { + "epoch": 0.88, + "learning_rate": 1.2579100393364117e-07, + "loss": 0.1474, + "step": 10976 + }, + { + "epoch": 0.88, + "learning_rate": 1.2305455789293654e-07, + "loss": 0.3074, + "step": 11008 + }, + { + "epoch": 0.89, + "learning_rate": 1.2031811185223191e-07, + "loss": 0.1517, + "step": 11040 + }, + { + "epoch": 0.89, + "learning_rate": 1.1758166581152727e-07, + "loss": 0.3134, + "step": 11072 + }, + { + "epoch": 0.89, + "learning_rate": 1.1484521977082263e-07, + "loss": 0.1734, + "step": 11104 + }, + { + "epoch": 0.9, + "learning_rate": 1.12108773730118e-07, + "loss": 0.24, + "step": 11136 + }, + { + "epoch": 0.9, + "learning_rate": 1.0937232768941338e-07, + "loss": 0.2515, + "step": 11168 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 4.75038284193792e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-11187/training_args.bin b/checkpoint-11187/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-11187/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-1243/config.json b/checkpoint-1243/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-1243/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-1243/generation_config.json b/checkpoint-1243/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-1243/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-1243/model-00001-of-00002.safetensors b/checkpoint-1243/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37bc8f17518c7af71c242baa58b178e2f2c3faac --- /dev/null +++ b/checkpoint-1243/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53dd4fe1b7540e808539f0bb9c5b944929f00c7f7900095a0db225b8d088bfdd +size 4992706480 diff --git a/checkpoint-1243/model-00002-of-00002.safetensors b/checkpoint-1243/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0727d4705395f0ba33f688bfc09daef5174677d --- /dev/null +++ b/checkpoint-1243/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3517c5ea86516a1c757abf66466a305606ea6f39c196daf6daf7818556aad9c3 +size 1180663192 diff --git a/checkpoint-1243/model.safetensors.index.json b/checkpoint-1243/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-1243/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-1243/optimizer.pt b/checkpoint-1243/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b98fac7caedb3cc3dfc016c71fcd53b4ef459375 --- /dev/null +++ b/checkpoint-1243/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e1aa5b65d67fc0981e7ad567dfb4d23be63817ba29537dca76aca65af892f2 +size 3095074288 diff --git a/checkpoint-1243/preprocessor_config.json b/checkpoint-1243/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-1243/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-1243/rng_state.pth b/checkpoint-1243/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e4d4ac34008406763f842ea1e969090fbeaaee8 --- /dev/null +++ b/checkpoint-1243/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3903d2c24f5d909c3588231ccdda3e1ce965628ad1d6bbc00bcf6420dbfb5a6 +size 14244 diff --git a/checkpoint-1243/scheduler.pt b/checkpoint-1243/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccf6bbe54562e47eac09798fafbd769c2990a1e7 --- /dev/null +++ b/checkpoint-1243/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca00b3236d817653aaa251d7bfffcc2d627276188e63cc50758b7f5fdc1187d1 +size 1064 diff --git a/checkpoint-1243/trainer_state.json b/checkpoint-1243/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f81cbfb0f1fbbfd58ce68f71abe92777490cc4bd --- /dev/null +++ b/checkpoint-1243/trainer_state.json @@ -0,0 +1,249 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.09991961414790997, + "eval_steps": 500, + "global_step": 1243, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 5.2782031577088e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1243/training_args.bin b/checkpoint-1243/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-1243/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-12430/config.json b/checkpoint-12430/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-12430/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-12430/generation_config.json b/checkpoint-12430/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-12430/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-12430/model-00001-of-00002.safetensors b/checkpoint-12430/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d4a1e7f46ec18bf8750c616759e8fd8434ad7b0 --- /dev/null +++ b/checkpoint-12430/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abcb38ffd84cb0cdd12bdbdcd2e051ed89beb5816bfa127790e24dbbc4760bb9 +size 4992706480 diff --git a/checkpoint-12430/model-00002-of-00002.safetensors b/checkpoint-12430/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a0ec91d0f6446f190b995dbe8f8f2b67ad107a3 --- /dev/null +++ b/checkpoint-12430/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aed090fddcb2645613adf66061b9db912fca6dba963bef57f78daba1b931f1a +size 1180663192 diff --git a/checkpoint-12430/model.safetensors.index.json b/checkpoint-12430/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-12430/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-12430/optimizer.pt b/checkpoint-12430/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dcca7ca337409772ff7e6f680a1aa96f2ebfb96 --- /dev/null +++ b/checkpoint-12430/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e37796041013f1d55d29eb17136b55bdca42f392762bf7a0c91ff990105736d +size 3095074288 diff --git a/checkpoint-12430/preprocessor_config.json b/checkpoint-12430/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-12430/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-12430/rng_state.pth b/checkpoint-12430/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..df9bd1b1a97156b7f7cce203bd14137ead0d9e8a --- /dev/null +++ b/checkpoint-12430/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068026cd5cf8e98946c482d433bf9914b7464f4949e9495be438b56bdc0b35f0 +size 14244 diff --git a/checkpoint-12430/scheduler.pt b/checkpoint-12430/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb4a36b01954b58879c2dc32fe448250e6089961 --- /dev/null +++ b/checkpoint-12430/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a93ce86da525a84bdadccb70efa15322ea47e5a4e5d6b55309383d7e3340d1d +size 1064 diff --git a/checkpoint-12430/trainer_state.json b/checkpoint-12430/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..14adcc8fa3bc48b1c495081f8eb798fceda70b10 --- /dev/null +++ b/checkpoint-12430/trainer_state.json @@ -0,0 +1,2349 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9991961414790996, + "eval_steps": 500, + "global_step": 12430, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + }, + { + "epoch": 0.5, + "learning_rate": 5.307850179579271e-07, + "loss": 0.1467, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 5.280485719172225e-07, + "loss": 0.2702, + "step": 6272 + }, + { + "epoch": 0.51, + "learning_rate": 5.253121258765179e-07, + "loss": 0.1736, + "step": 6304 + }, + { + "epoch": 0.51, + "learning_rate": 5.225756798358132e-07, + "loss": 0.2281, + "step": 6336 + }, + { + "epoch": 0.51, + "learning_rate": 5.198392337951087e-07, + "loss": 0.2114, + "step": 6368 + }, + { + "epoch": 0.51, + "learning_rate": 5.17102787754404e-07, + "loss": 0.2252, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 5.143663417136994e-07, + "loss": 0.163, + "step": 6432 + }, + { + "epoch": 0.52, + "learning_rate": 5.116298956729946e-07, + "loss": 0.266, + "step": 6464 + }, + { + "epoch": 0.52, + "learning_rate": 5.0889344963229e-07, + "loss": 0.2154, + "step": 6496 + }, + { + "epoch": 0.52, + "learning_rate": 5.061570035915854e-07, + "loss": 0.2003, + "step": 6528 + }, + { + "epoch": 0.53, + "learning_rate": 5.034205575508807e-07, + "loss": 0.1969, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 5.006841115101761e-07, + "loss": 0.2436, + "step": 6592 + }, + { + "epoch": 0.53, + "learning_rate": 4.979476654694714e-07, + "loss": 0.134, + "step": 6624 + }, + { + "epoch": 0.54, + "learning_rate": 4.952112194287669e-07, + "loss": 0.2246, + "step": 6656 + }, + { + "epoch": 0.54, + "learning_rate": 4.924747733880622e-07, + "loss": 0.1873, + "step": 6688 + }, + { + "epoch": 0.54, + "learning_rate": 4.897383273473576e-07, + "loss": 0.1549, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.87001881306653e-07, + "loss": 0.2518, + "step": 6752 + }, + { + "epoch": 0.55, + "learning_rate": 4.842654352659483e-07, + "loss": 0.2676, + "step": 6784 + }, + { + "epoch": 0.55, + "learning_rate": 4.815289892252437e-07, + "loss": 0.1537, + "step": 6816 + }, + { + "epoch": 0.55, + "learning_rate": 4.787925431845391e-07, + "loss": 0.1925, + "step": 6848 + }, + { + "epoch": 0.55, + "learning_rate": 4.760560971438344e-07, + "loss": 0.2157, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.7331965110312977e-07, + "loss": 0.1923, + "step": 6912 + }, + { + "epoch": 0.56, + "learning_rate": 4.7058320506242517e-07, + "loss": 0.1922, + "step": 6944 + }, + { + "epoch": 0.56, + "learning_rate": 4.678467590217205e-07, + "loss": 0.257, + "step": 6976 + }, + { + "epoch": 0.56, + "learning_rate": 4.6511031298101586e-07, + "loss": 0.1744, + "step": 7008 + }, + { + "epoch": 0.57, + "learning_rate": 4.6237386694031126e-07, + "loss": 0.2281, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.596374208996066e-07, + "loss": 0.2135, + "step": 7072 + }, + { + "epoch": 0.57, + "learning_rate": 4.56900974858902e-07, + "loss": 0.1841, + "step": 7104 + }, + { + "epoch": 0.57, + "learning_rate": 4.5416452881819735e-07, + "loss": 0.2652, + "step": 7136 + }, + { + "epoch": 0.58, + "learning_rate": 4.5142808277749275e-07, + "loss": 0.225, + "step": 7168 + }, + { + "epoch": 0.58, + "learning_rate": 4.4869163673678804e-07, + "loss": 0.2377, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4595519069608344e-07, + "loss": 0.2323, + "step": 7232 + }, + { + "epoch": 0.58, + "learning_rate": 4.432187446553788e-07, + "loss": 0.2227, + "step": 7264 + }, + { + "epoch": 0.59, + "learning_rate": 4.404822986146742e-07, + "loss": 0.2968, + "step": 7296 + }, + { + "epoch": 0.59, + "learning_rate": 4.377458525739695e-07, + "loss": 0.2623, + "step": 7328 + }, + { + "epoch": 0.59, + "learning_rate": 4.350094065332649e-07, + "loss": 0.1943, + "step": 7360 + }, + { + "epoch": 0.59, + "learning_rate": 4.322729604925603e-07, + "loss": 0.158, + "step": 7392 + }, + { + "epoch": 0.6, + "learning_rate": 4.2953651445185563e-07, + "loss": 0.2413, + "step": 7424 + }, + { + "epoch": 0.6, + "learning_rate": 4.2680006841115103e-07, + "loss": 0.2413, + "step": 7456 + }, + { + "epoch": 0.6, + "learning_rate": 4.240636223704464e-07, + "loss": 0.1793, + "step": 7488 + }, + { + "epoch": 0.6, + "learning_rate": 4.213271763297417e-07, + "loss": 0.2505, + "step": 7520 + }, + { + "epoch": 0.61, + "learning_rate": 4.1859073028903707e-07, + "loss": 0.2078, + "step": 7552 + }, + { + "epoch": 0.61, + "learning_rate": 4.1585428424833247e-07, + "loss": 0.2603, + "step": 7584 + }, + { + "epoch": 0.61, + "learning_rate": 4.131178382076278e-07, + "loss": 0.1426, + "step": 7616 + }, + { + "epoch": 0.61, + "learning_rate": 4.103813921669232e-07, + "loss": 0.1823, + "step": 7648 + }, + { + "epoch": 0.62, + "learning_rate": 4.0764494612621856e-07, + "loss": 0.2215, + "step": 7680 + }, + { + "epoch": 0.62, + "learning_rate": 4.049085000855139e-07, + "loss": 0.2956, + "step": 7712 + }, + { + "epoch": 0.62, + "learning_rate": 4.021720540448093e-07, + "loss": 0.1766, + "step": 7744 + }, + { + "epoch": 0.63, + "learning_rate": 3.9943560800410465e-07, + "loss": 0.2124, + "step": 7776 + }, + { + "epoch": 0.63, + "learning_rate": 3.9669916196340005e-07, + "loss": 0.2123, + "step": 7808 + }, + { + "epoch": 0.63, + "learning_rate": 3.9396271592269535e-07, + "loss": 0.2172, + "step": 7840 + }, + { + "epoch": 0.63, + "learning_rate": 3.9122626988199075e-07, + "loss": 0.1737, + "step": 7872 + }, + { + "epoch": 0.64, + "learning_rate": 3.884898238412861e-07, + "loss": 0.2504, + "step": 7904 + }, + { + "epoch": 0.64, + "learning_rate": 3.857533778005815e-07, + "loss": 0.2242, + "step": 7936 + }, + { + "epoch": 0.64, + "learning_rate": 3.8301693175987684e-07, + "loss": 0.205, + "step": 7968 + }, + { + "epoch": 0.64, + "learning_rate": 3.802804857191722e-07, + "loss": 0.2286, + "step": 8000 + }, + { + "epoch": 0.65, + "learning_rate": 3.775440396784676e-07, + "loss": 0.1848, + "step": 8032 + }, + { + "epoch": 0.65, + "learning_rate": 3.7480759363776293e-07, + "loss": 0.2065, + "step": 8064 + }, + { + "epoch": 0.65, + "learning_rate": 3.7207114759705833e-07, + "loss": 0.1688, + "step": 8096 + }, + { + "epoch": 0.65, + "learning_rate": 3.693347015563537e-07, + "loss": 0.2489, + "step": 8128 + }, + { + "epoch": 0.66, + "learning_rate": 3.66598255515649e-07, + "loss": 0.2309, + "step": 8160 + }, + { + "epoch": 0.66, + "learning_rate": 3.6386180947494437e-07, + "loss": 0.1236, + "step": 8192 + }, + { + "epoch": 0.66, + "learning_rate": 3.6112536343423977e-07, + "loss": 0.2242, + "step": 8224 + }, + { + "epoch": 0.66, + "learning_rate": 3.583889173935351e-07, + "loss": 0.221, + "step": 8256 + }, + { + "epoch": 0.67, + "learning_rate": 3.556524713528305e-07, + "loss": 0.1663, + "step": 8288 + }, + { + "epoch": 0.67, + "learning_rate": 3.5291602531212586e-07, + "loss": 0.2063, + "step": 8320 + }, + { + "epoch": 0.67, + "learning_rate": 3.501795792714212e-07, + "loss": 0.2651, + "step": 8352 + }, + { + "epoch": 0.67, + "learning_rate": 3.474431332307166e-07, + "loss": 0.2451, + "step": 8384 + }, + { + "epoch": 0.68, + "learning_rate": 3.4470668719001196e-07, + "loss": 0.1645, + "step": 8416 + }, + { + "epoch": 0.68, + "learning_rate": 3.4197024114930736e-07, + "loss": 0.1446, + "step": 8448 + }, + { + "epoch": 0.68, + "learning_rate": 3.3923379510860265e-07, + "loss": 0.2006, + "step": 8480 + }, + { + "epoch": 0.68, + "learning_rate": 3.3649734906789805e-07, + "loss": 0.1702, + "step": 8512 + }, + { + "epoch": 0.69, + "learning_rate": 3.337609030271934e-07, + "loss": 0.2685, + "step": 8544 + }, + { + "epoch": 0.69, + "learning_rate": 3.310244569864888e-07, + "loss": 0.1944, + "step": 8576 + }, + { + "epoch": 0.69, + "learning_rate": 3.2828801094578414e-07, + "loss": 0.1461, + "step": 8608 + }, + { + "epoch": 0.69, + "learning_rate": 3.255515649050795e-07, + "loss": 0.2539, + "step": 8640 + }, + { + "epoch": 0.7, + "learning_rate": 3.228151188643749e-07, + "loss": 0.1783, + "step": 8672 + }, + { + "epoch": 0.7, + "learning_rate": 3.2007867282367024e-07, + "loss": 0.2084, + "step": 8704 + }, + { + "epoch": 0.7, + "learning_rate": 3.1734222678296563e-07, + "loss": 0.1596, + "step": 8736 + }, + { + "epoch": 0.7, + "learning_rate": 3.14605780742261e-07, + "loss": 0.1677, + "step": 8768 + }, + { + "epoch": 0.71, + "learning_rate": 3.1186933470155633e-07, + "loss": 0.1636, + "step": 8800 + }, + { + "epoch": 0.71, + "learning_rate": 3.091328886608517e-07, + "loss": 0.2336, + "step": 8832 + }, + { + "epoch": 0.71, + "learning_rate": 3.063964426201471e-07, + "loss": 0.1853, + "step": 8864 + }, + { + "epoch": 0.72, + "learning_rate": 3.036599965794424e-07, + "loss": 0.2304, + "step": 8896 + }, + { + "epoch": 0.72, + "learning_rate": 3.009235505387378e-07, + "loss": 0.2037, + "step": 8928 + }, + { + "epoch": 0.72, + "learning_rate": 2.9818710449803317e-07, + "loss": 0.2159, + "step": 8960 + }, + { + "epoch": 0.72, + "learning_rate": 2.954506584573285e-07, + "loss": 0.2512, + "step": 8992 + }, + { + "epoch": 0.73, + "learning_rate": 2.927142124166239e-07, + "loss": 0.2011, + "step": 9024 + }, + { + "epoch": 0.73, + "learning_rate": 2.8997776637591926e-07, + "loss": 0.1677, + "step": 9056 + }, + { + "epoch": 0.73, + "learning_rate": 2.8724132033521466e-07, + "loss": 0.192, + "step": 9088 + }, + { + "epoch": 0.73, + "learning_rate": 2.8450487429450995e-07, + "loss": 0.1615, + "step": 9120 + }, + { + "epoch": 0.74, + "learning_rate": 2.8176842825380535e-07, + "loss": 0.1646, + "step": 9152 + }, + { + "epoch": 0.74, + "learning_rate": 2.790319822131007e-07, + "loss": 0.2301, + "step": 9184 + }, + { + "epoch": 0.74, + "learning_rate": 2.762955361723961e-07, + "loss": 0.1663, + "step": 9216 + }, + { + "epoch": 0.74, + "learning_rate": 2.7355909013169145e-07, + "loss": 0.251, + "step": 9248 + }, + { + "epoch": 0.75, + "learning_rate": 2.708226440909868e-07, + "loss": 0.2301, + "step": 9280 + }, + { + "epoch": 0.75, + "learning_rate": 2.680861980502822e-07, + "loss": 0.2222, + "step": 9312 + }, + { + "epoch": 0.75, + "learning_rate": 2.6534975200957754e-07, + "loss": 0.1784, + "step": 9344 + }, + { + "epoch": 0.75, + "learning_rate": 2.6261330596887294e-07, + "loss": 0.1714, + "step": 9376 + }, + { + "epoch": 0.76, + "learning_rate": 2.598768599281683e-07, + "loss": 0.2258, + "step": 9408 + }, + { + "epoch": 0.76, + "learning_rate": 2.571404138874637e-07, + "loss": 0.1907, + "step": 9440 + }, + { + "epoch": 0.76, + "learning_rate": 2.54403967846759e-07, + "loss": 0.1938, + "step": 9472 + }, + { + "epoch": 0.76, + "learning_rate": 2.516675218060544e-07, + "loss": 0.1831, + "step": 9504 + }, + { + "epoch": 0.77, + "learning_rate": 2.489310757653497e-07, + "loss": 0.1833, + "step": 9536 + }, + { + "epoch": 0.77, + "learning_rate": 2.461946297246451e-07, + "loss": 0.2551, + "step": 9568 + }, + { + "epoch": 0.77, + "learning_rate": 2.4345818368394047e-07, + "loss": 0.1553, + "step": 9600 + }, + { + "epoch": 0.77, + "learning_rate": 2.407217376432358e-07, + "loss": 0.1785, + "step": 9632 + }, + { + "epoch": 0.78, + "learning_rate": 2.379852916025312e-07, + "loss": 0.2903, + "step": 9664 + }, + { + "epoch": 0.78, + "learning_rate": 2.3524884556182656e-07, + "loss": 0.2093, + "step": 9696 + }, + { + "epoch": 0.78, + "learning_rate": 2.325123995211219e-07, + "loss": 0.2865, + "step": 9728 + }, + { + "epoch": 0.78, + "learning_rate": 2.2977595348041728e-07, + "loss": 0.2033, + "step": 9760 + }, + { + "epoch": 0.79, + "learning_rate": 2.2703950743971268e-07, + "loss": 0.1929, + "step": 9792 + }, + { + "epoch": 0.79, + "learning_rate": 2.2430306139900803e-07, + "loss": 0.1968, + "step": 9824 + }, + { + "epoch": 0.79, + "learning_rate": 2.215666153583034e-07, + "loss": 0.1985, + "step": 9856 + }, + { + "epoch": 0.79, + "learning_rate": 2.1883016931759877e-07, + "loss": 0.1509, + "step": 9888 + }, + { + "epoch": 0.8, + "learning_rate": 2.1609372327689412e-07, + "loss": 0.1988, + "step": 9920 + }, + { + "epoch": 0.8, + "learning_rate": 2.133572772361895e-07, + "loss": 0.2059, + "step": 9952 + }, + { + "epoch": 0.8, + "learning_rate": 2.1062083119548484e-07, + "loss": 0.1701, + "step": 9984 + }, + { + "epoch": 0.81, + "learning_rate": 2.0788438515478021e-07, + "loss": 0.1524, + "step": 10016 + }, + { + "epoch": 0.81, + "learning_rate": 2.051479391140756e-07, + "loss": 0.2236, + "step": 10048 + }, + { + "epoch": 0.81, + "learning_rate": 2.0241149307337093e-07, + "loss": 0.189, + "step": 10080 + }, + { + "epoch": 0.81, + "learning_rate": 1.9967504703266633e-07, + "loss": 0.1464, + "step": 10112 + }, + { + "epoch": 0.82, + "learning_rate": 1.9693860099196168e-07, + "loss": 0.2245, + "step": 10144 + }, + { + "epoch": 0.82, + "learning_rate": 1.9420215495125705e-07, + "loss": 0.2619, + "step": 10176 + }, + { + "epoch": 0.82, + "learning_rate": 1.9146570891055243e-07, + "loss": 0.2331, + "step": 10208 + }, + { + "epoch": 0.82, + "learning_rate": 1.8872926286984777e-07, + "loss": 0.1925, + "step": 10240 + }, + { + "epoch": 0.83, + "learning_rate": 1.8599281682914315e-07, + "loss": 0.2707, + "step": 10272 + }, + { + "epoch": 0.83, + "learning_rate": 1.832563707884385e-07, + "loss": 0.1792, + "step": 10304 + }, + { + "epoch": 0.83, + "learning_rate": 1.8051992474773387e-07, + "loss": 0.1663, + "step": 10336 + }, + { + "epoch": 0.83, + "learning_rate": 1.7778347870702924e-07, + "loss": 0.1783, + "step": 10368 + }, + { + "epoch": 0.84, + "learning_rate": 1.7504703266632459e-07, + "loss": 0.2028, + "step": 10400 + }, + { + "epoch": 0.84, + "learning_rate": 1.7231058662561998e-07, + "loss": 0.2036, + "step": 10432 + }, + { + "epoch": 0.84, + "learning_rate": 1.6957414058491533e-07, + "loss": 0.1832, + "step": 10464 + }, + { + "epoch": 0.84, + "learning_rate": 1.668376945442107e-07, + "loss": 0.1795, + "step": 10496 + }, + { + "epoch": 0.85, + "learning_rate": 1.6410124850350608e-07, + "loss": 0.1507, + "step": 10528 + }, + { + "epoch": 0.85, + "learning_rate": 1.6136480246280142e-07, + "loss": 0.2078, + "step": 10560 + }, + { + "epoch": 0.85, + "learning_rate": 1.586283564220968e-07, + "loss": 0.2228, + "step": 10592 + }, + { + "epoch": 0.85, + "learning_rate": 1.5589191038139214e-07, + "loss": 0.2, + "step": 10624 + }, + { + "epoch": 0.86, + "learning_rate": 1.5315546434068752e-07, + "loss": 0.1615, + "step": 10656 + }, + { + "epoch": 0.86, + "learning_rate": 1.504190182999829e-07, + "loss": 0.1746, + "step": 10688 + }, + { + "epoch": 0.86, + "learning_rate": 1.4768257225927824e-07, + "loss": 0.1857, + "step": 10720 + }, + { + "epoch": 0.86, + "learning_rate": 1.4494612621857364e-07, + "loss": 0.2143, + "step": 10752 + }, + { + "epoch": 0.87, + "learning_rate": 1.4220968017786898e-07, + "loss": 0.1919, + "step": 10784 + }, + { + "epoch": 0.87, + "learning_rate": 1.3947323413716436e-07, + "loss": 0.1885, + "step": 10816 + }, + { + "epoch": 0.87, + "learning_rate": 1.3673678809645973e-07, + "loss": 0.1617, + "step": 10848 + }, + { + "epoch": 0.87, + "learning_rate": 1.3400034205575508e-07, + "loss": 0.1512, + "step": 10880 + }, + { + "epoch": 0.88, + "learning_rate": 1.3126389601505045e-07, + "loss": 0.1666, + "step": 10912 + }, + { + "epoch": 0.88, + "learning_rate": 1.2852744997434582e-07, + "loss": 0.1519, + "step": 10944 + }, + { + "epoch": 0.88, + "learning_rate": 1.2579100393364117e-07, + "loss": 0.1474, + "step": 10976 + }, + { + "epoch": 0.88, + "learning_rate": 1.2305455789293654e-07, + "loss": 0.3074, + "step": 11008 + }, + { + "epoch": 0.89, + "learning_rate": 1.2031811185223191e-07, + "loss": 0.1517, + "step": 11040 + }, + { + "epoch": 0.89, + "learning_rate": 1.1758166581152727e-07, + "loss": 0.3134, + "step": 11072 + }, + { + "epoch": 0.89, + "learning_rate": 1.1484521977082263e-07, + "loss": 0.1734, + "step": 11104 + }, + { + "epoch": 0.9, + "learning_rate": 1.12108773730118e-07, + "loss": 0.24, + "step": 11136 + }, + { + "epoch": 0.9, + "learning_rate": 1.0937232768941338e-07, + "loss": 0.2515, + "step": 11168 + }, + { + "epoch": 0.9, + "learning_rate": 1.0663588164870874e-07, + "loss": 0.2518, + "step": 11200 + }, + { + "epoch": 0.9, + "learning_rate": 1.038994356080041e-07, + "loss": 0.2273, + "step": 11232 + }, + { + "epoch": 0.91, + "learning_rate": 1.0116298956729946e-07, + "loss": 0.2285, + "step": 11264 + }, + { + "epoch": 0.91, + "learning_rate": 9.842654352659482e-08, + "loss": 0.1872, + "step": 11296 + }, + { + "epoch": 0.91, + "learning_rate": 9.56900974858902e-08, + "loss": 0.1507, + "step": 11328 + }, + { + "epoch": 0.91, + "learning_rate": 9.295365144518557e-08, + "loss": 0.2274, + "step": 11360 + }, + { + "epoch": 0.92, + "learning_rate": 9.021720540448093e-08, + "loss": 0.2448, + "step": 11392 + }, + { + "epoch": 0.92, + "learning_rate": 8.748075936377629e-08, + "loss": 0.2097, + "step": 11424 + }, + { + "epoch": 0.92, + "learning_rate": 8.474431332307165e-08, + "loss": 0.1815, + "step": 11456 + }, + { + "epoch": 0.92, + "learning_rate": 8.200786728236703e-08, + "loss": 0.1796, + "step": 11488 + }, + { + "epoch": 0.93, + "learning_rate": 7.927142124166239e-08, + "loss": 0.1981, + "step": 11520 + }, + { + "epoch": 0.93, + "learning_rate": 7.653497520095775e-08, + "loss": 0.168, + "step": 11552 + }, + { + "epoch": 0.93, + "learning_rate": 7.379852916025311e-08, + "loss": 0.152, + "step": 11584 + }, + { + "epoch": 0.93, + "learning_rate": 7.106208311954847e-08, + "loss": 0.1627, + "step": 11616 + }, + { + "epoch": 0.94, + "learning_rate": 6.832563707884386e-08, + "loss": 0.1114, + "step": 11648 + }, + { + "epoch": 0.94, + "learning_rate": 6.558919103813922e-08, + "loss": 0.2017, + "step": 11680 + }, + { + "epoch": 0.94, + "learning_rate": 6.285274499743458e-08, + "loss": 0.3228, + "step": 11712 + }, + { + "epoch": 0.94, + "learning_rate": 6.011629895672994e-08, + "loss": 0.1934, + "step": 11744 + }, + { + "epoch": 0.95, + "learning_rate": 5.737985291602531e-08, + "loss": 0.2293, + "step": 11776 + }, + { + "epoch": 0.95, + "learning_rate": 5.464340687532067e-08, + "loss": 0.1689, + "step": 11808 + }, + { + "epoch": 0.95, + "learning_rate": 5.190696083461604e-08, + "loss": 0.1984, + "step": 11840 + }, + { + "epoch": 0.95, + "learning_rate": 4.91705147939114e-08, + "loss": 0.2479, + "step": 11872 + }, + { + "epoch": 0.96, + "learning_rate": 4.643406875320677e-08, + "loss": 0.1582, + "step": 11904 + }, + { + "epoch": 0.96, + "learning_rate": 4.3697622712502136e-08, + "loss": 0.2271, + "step": 11936 + }, + { + "epoch": 0.96, + "learning_rate": 4.09611766717975e-08, + "loss": 0.1918, + "step": 11968 + }, + { + "epoch": 0.96, + "learning_rate": 3.831024456986488e-08, + "loss": 0.1229, + "step": 12000 + }, + { + "epoch": 0.97, + "learning_rate": 3.557379852916025e-08, + "loss": 0.1692, + "step": 12032 + }, + { + "epoch": 0.97, + "learning_rate": 3.2837352488455616e-08, + "loss": 0.2048, + "step": 12064 + }, + { + "epoch": 0.97, + "learning_rate": 3.010090644775098e-08, + "loss": 0.2802, + "step": 12096 + }, + { + "epoch": 0.97, + "learning_rate": 2.736446040704635e-08, + "loss": 0.1542, + "step": 12128 + }, + { + "epoch": 0.98, + "learning_rate": 2.4628014366341712e-08, + "loss": 0.1748, + "step": 12160 + }, + { + "epoch": 0.98, + "learning_rate": 2.1891568325637078e-08, + "loss": 0.1391, + "step": 12192 + }, + { + "epoch": 0.98, + "learning_rate": 1.915512228493244e-08, + "loss": 0.21, + "step": 12224 + }, + { + "epoch": 0.99, + "learning_rate": 1.6418676244227808e-08, + "loss": 0.1632, + "step": 12256 + }, + { + "epoch": 0.99, + "learning_rate": 1.3682230203523174e-08, + "loss": 0.2148, + "step": 12288 + }, + { + "epoch": 0.99, + "learning_rate": 1.0945784162818539e-08, + "loss": 0.1609, + "step": 12320 + }, + { + "epoch": 0.99, + "learning_rate": 8.209338122113904e-09, + "loss": 0.2158, + "step": 12352 + }, + { + "epoch": 1.0, + "learning_rate": 5.4728920814092695e-09, + "loss": 0.2854, + "step": 12384 + }, + { + "epoch": 1.0, + "learning_rate": 2.7364460407046348e-09, + "loss": 0.2332, + "step": 12416 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 5.2782031577088e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-12430/training_args.bin b/checkpoint-12430/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-12430/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-2486/config.json b/checkpoint-2486/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-2486/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-2486/generation_config.json b/checkpoint-2486/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-2486/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-2486/model-00001-of-00002.safetensors b/checkpoint-2486/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c45907dbb5378d1fdab36ea6bc9cd6d03653726b --- /dev/null +++ b/checkpoint-2486/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5b5cd4e92c4ca2fbc9baa1a4945af97031f91130431638ad40a1714ec3bb21 +size 4992706480 diff --git a/checkpoint-2486/model-00002-of-00002.safetensors b/checkpoint-2486/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e14e54dc0389911f9322f3aa321d8fe2fc49924 --- /dev/null +++ b/checkpoint-2486/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d16b87cd20c1d44823b7898e9bfdd283fb03c515b83651be38123fc9b80465 +size 1180663192 diff --git a/checkpoint-2486/model.safetensors.index.json b/checkpoint-2486/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-2486/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-2486/optimizer.pt b/checkpoint-2486/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fe4062ac3377e2af0f9ebdf66a99ccddc1b2a66 --- /dev/null +++ b/checkpoint-2486/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efbd0884bd23b1cde370826dc6e14f56f54bcabe35739950b6664d802d64c54 +size 3095074288 diff --git a/checkpoint-2486/preprocessor_config.json b/checkpoint-2486/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-2486/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-2486/rng_state.pth b/checkpoint-2486/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a4a1f0a4f6656cf9717031a9be68e189696c7fe --- /dev/null +++ b/checkpoint-2486/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc80761296b6bb99c8df5ed01b6fd5d4143d86e84f2efeb88107b2eee1981469 +size 14244 diff --git a/checkpoint-2486/scheduler.pt b/checkpoint-2486/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac631c245a700b4bb1643a5b621f1ad73733a55 --- /dev/null +++ b/checkpoint-2486/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40a7a944db2c20c81a7aea659ca6dd544ac9a20ffe9620a05ff3249027674fd +size 1064 diff --git a/checkpoint-2486/trainer_state.json b/checkpoint-2486/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6c22f2aadb9cc6e59528eb5cbc8222ff4be7404a --- /dev/null +++ b/checkpoint-2486/trainer_state.json @@ -0,0 +1,483 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.19983922829581993, + "eval_steps": 500, + "global_step": 2486, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 1.05564063154176e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2486/training_args.bin b/checkpoint-2486/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-2486/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-3729/config.json b/checkpoint-3729/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-3729/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-3729/generation_config.json b/checkpoint-3729/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-3729/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-3729/model-00001-of-00002.safetensors b/checkpoint-3729/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd83749412826090f2e6302a422759a16f02a790 --- /dev/null +++ b/checkpoint-3729/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46ed3553eacb1b9780a5a6af5d2ea15d38821b724163f1df8d7116dd9ce1c6c +size 4992706480 diff --git a/checkpoint-3729/model-00002-of-00002.safetensors b/checkpoint-3729/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bbb269706a4f9915ed50e60a8e41ce688da4155 --- /dev/null +++ b/checkpoint-3729/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0adf238584a0a8b67790f24f04dcbb2861816a19bb410166458d2a3fbbcccf65 +size 1180663192 diff --git a/checkpoint-3729/model.safetensors.index.json b/checkpoint-3729/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-3729/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-3729/optimizer.pt b/checkpoint-3729/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c7fcd2dc56dc8803c2106d29773255ff51e8206 --- /dev/null +++ b/checkpoint-3729/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d9d2f6b00536d584bc6d9a89541bedb9489fa757129122b98d9935478d40fa +size 3095074288 diff --git a/checkpoint-3729/preprocessor_config.json b/checkpoint-3729/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-3729/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-3729/rng_state.pth b/checkpoint-3729/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb3cd6ffcf82e6398861946434b8142ec0787f18 --- /dev/null +++ b/checkpoint-3729/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc5d9bb1101ecbe94dd5529a1a23a0d9315fb333ec6b81546268d04e67586b6 +size 14244 diff --git a/checkpoint-3729/scheduler.pt b/checkpoint-3729/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..80eb2fc5e67ee38cae1e6178cd779439ea198e7e --- /dev/null +++ b/checkpoint-3729/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd39e1d9eec68032e6b746da8769db13eba4ee17993a5eec13c7263dc8317c58 +size 1064 diff --git a/checkpoint-3729/trainer_state.json b/checkpoint-3729/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9d406cf7abad2a359a9f86577c891d72d54dec16 --- /dev/null +++ b/checkpoint-3729/trainer_state.json @@ -0,0 +1,717 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2997588424437299, + "eval_steps": 500, + "global_step": 3729, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 1.58346094731264e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3729/training_args.bin b/checkpoint-3729/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-3729/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-4972/config.json b/checkpoint-4972/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-4972/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-4972/generation_config.json b/checkpoint-4972/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-4972/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-4972/model-00001-of-00002.safetensors b/checkpoint-4972/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cd8b53498ef69cce9284d819ee3030b71e46069 --- /dev/null +++ b/checkpoint-4972/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931afef289ee639a2f9a690eaaec21e862e5799cb0a215ec4ce745c99092e0bb +size 4992706480 diff --git a/checkpoint-4972/model-00002-of-00002.safetensors b/checkpoint-4972/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0096efedb42388108e0550116971f75d4c7bb4cc --- /dev/null +++ b/checkpoint-4972/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f76ced93a5e0d399c243be27f017def3cdbee0ab13837892213229daacbe8c1 +size 1180663192 diff --git a/checkpoint-4972/model.safetensors.index.json b/checkpoint-4972/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-4972/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-4972/optimizer.pt b/checkpoint-4972/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e86380e46a329f000dc24530402aedeb5c9561c5 --- /dev/null +++ b/checkpoint-4972/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15aca513d05f4bb6f2ab24056275c30dc59f3d4e8b909a9df091b373f193dcbb +size 3095074288 diff --git a/checkpoint-4972/preprocessor_config.json b/checkpoint-4972/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-4972/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-4972/rng_state.pth b/checkpoint-4972/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e44b1c08603960c66960728320cb437de82e09a9 --- /dev/null +++ b/checkpoint-4972/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b88d995b9f70d58057435c82e33a340ab58d83a8938273656b6164c34d5868 +size 14244 diff --git a/checkpoint-4972/scheduler.pt b/checkpoint-4972/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..822d0e573a527a99f6ec8cf7af1b97a8f4f1c332 --- /dev/null +++ b/checkpoint-4972/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492288ed0e7dc4ba28e17a0886c8ff8c8cd36a2d40941716e1a71181f8a684bf +size 1064 diff --git a/checkpoint-4972/trainer_state.json b/checkpoint-4972/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..36f08d6cee58ce031be96001f66d2fe13691c03b --- /dev/null +++ b/checkpoint-4972/trainer_state.json @@ -0,0 +1,951 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.39967845659163986, + "eval_steps": 500, + "global_step": 4972, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 2.11128126308352e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4972/training_args.bin b/checkpoint-4972/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-4972/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-6215/config.json b/checkpoint-6215/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-6215/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-6215/generation_config.json b/checkpoint-6215/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-6215/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-6215/model-00001-of-00002.safetensors b/checkpoint-6215/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..325480dd73ca8f74ad77edc5ae33d19e288182cc --- /dev/null +++ b/checkpoint-6215/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2306c42eee2f00a9811bb87d30fe178bcffc3f6b4a7d2ae3fb0d959dc4da1984 +size 4992706480 diff --git a/checkpoint-6215/model-00002-of-00002.safetensors b/checkpoint-6215/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..635e6e8665e3e4461b7a90d61ebadf01f697cc77 --- /dev/null +++ b/checkpoint-6215/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5250af5532f1c0a83d5a7998763fb53282423031abd7495d190540ba6e6d6756 +size 1180663192 diff --git a/checkpoint-6215/model.safetensors.index.json b/checkpoint-6215/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-6215/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-6215/optimizer.pt b/checkpoint-6215/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a70fcc84b121491bc74ddf12a037987b4e60a0bd --- /dev/null +++ b/checkpoint-6215/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b48f56f3f8b014ef7e1fbee244842193553244afa000ac661a63d01db1b97c7 +size 3095074288 diff --git a/checkpoint-6215/preprocessor_config.json b/checkpoint-6215/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-6215/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-6215/rng_state.pth b/checkpoint-6215/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..366f170944b2d4624cc2d3b08e5ea4f1fb757c81 --- /dev/null +++ b/checkpoint-6215/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f570cdc536c19488417eacf39c25265dc84b2b767784c0833be4d7b4a411f207 +size 14244 diff --git a/checkpoint-6215/scheduler.pt b/checkpoint-6215/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f021011627d71fb3e25dd1ac1dd48ffe89caf00d --- /dev/null +++ b/checkpoint-6215/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61dd13e07c593dc337382eca83adc174b876e952f29e523872440d25e1355237 +size 1064 diff --git a/checkpoint-6215/trainer_state.json b/checkpoint-6215/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7bf02f53aed535267a0cdf86fa1028cd2e102763 --- /dev/null +++ b/checkpoint-6215/trainer_state.json @@ -0,0 +1,1185 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4995980707395498, + "eval_steps": 500, + "global_step": 6215, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 2.6391015788544e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6215/training_args.bin b/checkpoint-6215/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-6215/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-7458/config.json b/checkpoint-7458/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-7458/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-7458/generation_config.json b/checkpoint-7458/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-7458/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-7458/model-00001-of-00002.safetensors b/checkpoint-7458/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..187021ffe7588b321fb77c9fb2bf52e7d9d00123 --- /dev/null +++ b/checkpoint-7458/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f17a001f75226b107993fa146968f517a9b2e9e233f514adbe919e1a83594d8 +size 4992706480 diff --git a/checkpoint-7458/model-00002-of-00002.safetensors b/checkpoint-7458/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05eb5798a9cbb10ce4fc696b7921c9850dbfd9cc --- /dev/null +++ b/checkpoint-7458/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f60f80d7ab13887bf379c6f956e7f17aaaddb17e9785f68b30b6a572a8f3c6c +size 1180663192 diff --git a/checkpoint-7458/model.safetensors.index.json b/checkpoint-7458/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-7458/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-7458/optimizer.pt b/checkpoint-7458/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf502cb334ea78c2044bc6160cdefeb4921b8783 --- /dev/null +++ b/checkpoint-7458/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abda99aab5bc7dcb9cbb086c542abafea6b76a4fd5522d53bebf932b53893d4c +size 3095074288 diff --git a/checkpoint-7458/preprocessor_config.json b/checkpoint-7458/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-7458/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-7458/rng_state.pth b/checkpoint-7458/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cabb9a123375c164bf284479555e62d742a26562 --- /dev/null +++ b/checkpoint-7458/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d51e5667553a42c75f9615b9256e4e25b3207f5b91bc8419a768b3ba1b7d9f14 +size 14244 diff --git a/checkpoint-7458/scheduler.pt b/checkpoint-7458/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5284da9b3a9ae70e1150268093cd59c92019f7a --- /dev/null +++ b/checkpoint-7458/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1615612216ce089452949985ecd61c14aa647a39e7a2b090fd4e383034ccfe67 +size 1064 diff --git a/checkpoint-7458/trainer_state.json b/checkpoint-7458/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..57db6794ce347320a8e8055b024e5259659e1f7e --- /dev/null +++ b/checkpoint-7458/trainer_state.json @@ -0,0 +1,1419 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5995176848874598, + "eval_steps": 500, + "global_step": 7458, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + }, + { + "epoch": 0.5, + "learning_rate": 5.307850179579271e-07, + "loss": 0.1467, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 5.280485719172225e-07, + "loss": 0.2702, + "step": 6272 + }, + { + "epoch": 0.51, + "learning_rate": 5.253121258765179e-07, + "loss": 0.1736, + "step": 6304 + }, + { + "epoch": 0.51, + "learning_rate": 5.225756798358132e-07, + "loss": 0.2281, + "step": 6336 + }, + { + "epoch": 0.51, + "learning_rate": 5.198392337951087e-07, + "loss": 0.2114, + "step": 6368 + }, + { + "epoch": 0.51, + "learning_rate": 5.17102787754404e-07, + "loss": 0.2252, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 5.143663417136994e-07, + "loss": 0.163, + "step": 6432 + }, + { + "epoch": 0.52, + "learning_rate": 5.116298956729946e-07, + "loss": 0.266, + "step": 6464 + }, + { + "epoch": 0.52, + "learning_rate": 5.0889344963229e-07, + "loss": 0.2154, + "step": 6496 + }, + { + "epoch": 0.52, + "learning_rate": 5.061570035915854e-07, + "loss": 0.2003, + "step": 6528 + }, + { + "epoch": 0.53, + "learning_rate": 5.034205575508807e-07, + "loss": 0.1969, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 5.006841115101761e-07, + "loss": 0.2436, + "step": 6592 + }, + { + "epoch": 0.53, + "learning_rate": 4.979476654694714e-07, + "loss": 0.134, + "step": 6624 + }, + { + "epoch": 0.54, + "learning_rate": 4.952112194287669e-07, + "loss": 0.2246, + "step": 6656 + }, + { + "epoch": 0.54, + "learning_rate": 4.924747733880622e-07, + "loss": 0.1873, + "step": 6688 + }, + { + "epoch": 0.54, + "learning_rate": 4.897383273473576e-07, + "loss": 0.1549, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.87001881306653e-07, + "loss": 0.2518, + "step": 6752 + }, + { + "epoch": 0.55, + "learning_rate": 4.842654352659483e-07, + "loss": 0.2676, + "step": 6784 + }, + { + "epoch": 0.55, + "learning_rate": 4.815289892252437e-07, + "loss": 0.1537, + "step": 6816 + }, + { + "epoch": 0.55, + "learning_rate": 4.787925431845391e-07, + "loss": 0.1925, + "step": 6848 + }, + { + "epoch": 0.55, + "learning_rate": 4.760560971438344e-07, + "loss": 0.2157, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.7331965110312977e-07, + "loss": 0.1923, + "step": 6912 + }, + { + "epoch": 0.56, + "learning_rate": 4.7058320506242517e-07, + "loss": 0.1922, + "step": 6944 + }, + { + "epoch": 0.56, + "learning_rate": 4.678467590217205e-07, + "loss": 0.257, + "step": 6976 + }, + { + "epoch": 0.56, + "learning_rate": 4.6511031298101586e-07, + "loss": 0.1744, + "step": 7008 + }, + { + "epoch": 0.57, + "learning_rate": 4.6237386694031126e-07, + "loss": 0.2281, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.596374208996066e-07, + "loss": 0.2135, + "step": 7072 + }, + { + "epoch": 0.57, + "learning_rate": 4.56900974858902e-07, + "loss": 0.1841, + "step": 7104 + }, + { + "epoch": 0.57, + "learning_rate": 4.5416452881819735e-07, + "loss": 0.2652, + "step": 7136 + }, + { + "epoch": 0.58, + "learning_rate": 4.5142808277749275e-07, + "loss": 0.225, + "step": 7168 + }, + { + "epoch": 0.58, + "learning_rate": 4.4869163673678804e-07, + "loss": 0.2377, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4595519069608344e-07, + "loss": 0.2323, + "step": 7232 + }, + { + "epoch": 0.58, + "learning_rate": 4.432187446553788e-07, + "loss": 0.2227, + "step": 7264 + }, + { + "epoch": 0.59, + "learning_rate": 4.404822986146742e-07, + "loss": 0.2968, + "step": 7296 + }, + { + "epoch": 0.59, + "learning_rate": 4.377458525739695e-07, + "loss": 0.2623, + "step": 7328 + }, + { + "epoch": 0.59, + "learning_rate": 4.350094065332649e-07, + "loss": 0.1943, + "step": 7360 + }, + { + "epoch": 0.59, + "learning_rate": 4.322729604925603e-07, + "loss": 0.158, + "step": 7392 + }, + { + "epoch": 0.6, + "learning_rate": 4.2953651445185563e-07, + "loss": 0.2413, + "step": 7424 + }, + { + "epoch": 0.6, + "learning_rate": 4.2680006841115103e-07, + "loss": 0.2413, + "step": 7456 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 3.16692189462528e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7458/training_args.bin b/checkpoint-7458/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-7458/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-8701/config.json b/checkpoint-8701/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-8701/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-8701/generation_config.json b/checkpoint-8701/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-8701/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-8701/model-00001-of-00002.safetensors b/checkpoint-8701/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61415911782474726671835392699d0d2910dd29 --- /dev/null +++ b/checkpoint-8701/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73184a49035d5a87728d9619c1e990704bd1b2247b6cf99df51ce3cbed83dcf9 +size 4992706480 diff --git a/checkpoint-8701/model-00002-of-00002.safetensors b/checkpoint-8701/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..85a283a7426d0bd0e9a692bc9c046aa231fe14c5 --- /dev/null +++ b/checkpoint-8701/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b18d724b67bd0952041cb4ac368412dc42cdb9a19b745c8dc961b43f7c1610 +size 1180663192 diff --git a/checkpoint-8701/model.safetensors.index.json b/checkpoint-8701/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-8701/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-8701/optimizer.pt b/checkpoint-8701/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..83a422af02500665aaf184a1a8dc52111a4b6ac4 --- /dev/null +++ b/checkpoint-8701/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cafce615ac1ef1ccb511fb254327253b30ec712dfeccfb739bbae10c2fa58db8 +size 3095074288 diff --git a/checkpoint-8701/preprocessor_config.json b/checkpoint-8701/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-8701/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-8701/rng_state.pth b/checkpoint-8701/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..216a7af81b78533b10125b45e20bf1c848acf27d --- /dev/null +++ b/checkpoint-8701/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da8dc56ceccfb12d1ef0c0c300fe03a5cf36c33bdd9f0ca08cfe38a6eded94ca +size 14244 diff --git a/checkpoint-8701/scheduler.pt b/checkpoint-8701/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76cde69a35b458cb9b7d6f393e66b7c2d400b18a --- /dev/null +++ b/checkpoint-8701/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85534785beed4d8223605974f3fa791b41cc9db1590b777cb5b0cbff464cac25 +size 1064 diff --git a/checkpoint-8701/trainer_state.json b/checkpoint-8701/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4a7ff8f95aa03e117bfc262b63b4f386c228d9ae --- /dev/null +++ b/checkpoint-8701/trainer_state.json @@ -0,0 +1,1647 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6994372990353698, + "eval_steps": 500, + "global_step": 8701, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + }, + { + "epoch": 0.5, + "learning_rate": 5.307850179579271e-07, + "loss": 0.1467, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 5.280485719172225e-07, + "loss": 0.2702, + "step": 6272 + }, + { + "epoch": 0.51, + "learning_rate": 5.253121258765179e-07, + "loss": 0.1736, + "step": 6304 + }, + { + "epoch": 0.51, + "learning_rate": 5.225756798358132e-07, + "loss": 0.2281, + "step": 6336 + }, + { + "epoch": 0.51, + "learning_rate": 5.198392337951087e-07, + "loss": 0.2114, + "step": 6368 + }, + { + "epoch": 0.51, + "learning_rate": 5.17102787754404e-07, + "loss": 0.2252, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 5.143663417136994e-07, + "loss": 0.163, + "step": 6432 + }, + { + "epoch": 0.52, + "learning_rate": 5.116298956729946e-07, + "loss": 0.266, + "step": 6464 + }, + { + "epoch": 0.52, + "learning_rate": 5.0889344963229e-07, + "loss": 0.2154, + "step": 6496 + }, + { + "epoch": 0.52, + "learning_rate": 5.061570035915854e-07, + "loss": 0.2003, + "step": 6528 + }, + { + "epoch": 0.53, + "learning_rate": 5.034205575508807e-07, + "loss": 0.1969, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 5.006841115101761e-07, + "loss": 0.2436, + "step": 6592 + }, + { + "epoch": 0.53, + "learning_rate": 4.979476654694714e-07, + "loss": 0.134, + "step": 6624 + }, + { + "epoch": 0.54, + "learning_rate": 4.952112194287669e-07, + "loss": 0.2246, + "step": 6656 + }, + { + "epoch": 0.54, + "learning_rate": 4.924747733880622e-07, + "loss": 0.1873, + "step": 6688 + }, + { + "epoch": 0.54, + "learning_rate": 4.897383273473576e-07, + "loss": 0.1549, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.87001881306653e-07, + "loss": 0.2518, + "step": 6752 + }, + { + "epoch": 0.55, + "learning_rate": 4.842654352659483e-07, + "loss": 0.2676, + "step": 6784 + }, + { + "epoch": 0.55, + "learning_rate": 4.815289892252437e-07, + "loss": 0.1537, + "step": 6816 + }, + { + "epoch": 0.55, + "learning_rate": 4.787925431845391e-07, + "loss": 0.1925, + "step": 6848 + }, + { + "epoch": 0.55, + "learning_rate": 4.760560971438344e-07, + "loss": 0.2157, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.7331965110312977e-07, + "loss": 0.1923, + "step": 6912 + }, + { + "epoch": 0.56, + "learning_rate": 4.7058320506242517e-07, + "loss": 0.1922, + "step": 6944 + }, + { + "epoch": 0.56, + "learning_rate": 4.678467590217205e-07, + "loss": 0.257, + "step": 6976 + }, + { + "epoch": 0.56, + "learning_rate": 4.6511031298101586e-07, + "loss": 0.1744, + "step": 7008 + }, + { + "epoch": 0.57, + "learning_rate": 4.6237386694031126e-07, + "loss": 0.2281, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.596374208996066e-07, + "loss": 0.2135, + "step": 7072 + }, + { + "epoch": 0.57, + "learning_rate": 4.56900974858902e-07, + "loss": 0.1841, + "step": 7104 + }, + { + "epoch": 0.57, + "learning_rate": 4.5416452881819735e-07, + "loss": 0.2652, + "step": 7136 + }, + { + "epoch": 0.58, + "learning_rate": 4.5142808277749275e-07, + "loss": 0.225, + "step": 7168 + }, + { + "epoch": 0.58, + "learning_rate": 4.4869163673678804e-07, + "loss": 0.2377, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4595519069608344e-07, + "loss": 0.2323, + "step": 7232 + }, + { + "epoch": 0.58, + "learning_rate": 4.432187446553788e-07, + "loss": 0.2227, + "step": 7264 + }, + { + "epoch": 0.59, + "learning_rate": 4.404822986146742e-07, + "loss": 0.2968, + "step": 7296 + }, + { + "epoch": 0.59, + "learning_rate": 4.377458525739695e-07, + "loss": 0.2623, + "step": 7328 + }, + { + "epoch": 0.59, + "learning_rate": 4.350094065332649e-07, + "loss": 0.1943, + "step": 7360 + }, + { + "epoch": 0.59, + "learning_rate": 4.322729604925603e-07, + "loss": 0.158, + "step": 7392 + }, + { + "epoch": 0.6, + "learning_rate": 4.2953651445185563e-07, + "loss": 0.2413, + "step": 7424 + }, + { + "epoch": 0.6, + "learning_rate": 4.2680006841115103e-07, + "loss": 0.2413, + "step": 7456 + }, + { + "epoch": 0.6, + "learning_rate": 4.240636223704464e-07, + "loss": 0.1793, + "step": 7488 + }, + { + "epoch": 0.6, + "learning_rate": 4.213271763297417e-07, + "loss": 0.2505, + "step": 7520 + }, + { + "epoch": 0.61, + "learning_rate": 4.1859073028903707e-07, + "loss": 0.2078, + "step": 7552 + }, + { + "epoch": 0.61, + "learning_rate": 4.1585428424833247e-07, + "loss": 0.2603, + "step": 7584 + }, + { + "epoch": 0.61, + "learning_rate": 4.131178382076278e-07, + "loss": 0.1426, + "step": 7616 + }, + { + "epoch": 0.61, + "learning_rate": 4.103813921669232e-07, + "loss": 0.1823, + "step": 7648 + }, + { + "epoch": 0.62, + "learning_rate": 4.0764494612621856e-07, + "loss": 0.2215, + "step": 7680 + }, + { + "epoch": 0.62, + "learning_rate": 4.049085000855139e-07, + "loss": 0.2956, + "step": 7712 + }, + { + "epoch": 0.62, + "learning_rate": 4.021720540448093e-07, + "loss": 0.1766, + "step": 7744 + }, + { + "epoch": 0.63, + "learning_rate": 3.9943560800410465e-07, + "loss": 0.2124, + "step": 7776 + }, + { + "epoch": 0.63, + "learning_rate": 3.9669916196340005e-07, + "loss": 0.2123, + "step": 7808 + }, + { + "epoch": 0.63, + "learning_rate": 3.9396271592269535e-07, + "loss": 0.2172, + "step": 7840 + }, + { + "epoch": 0.63, + "learning_rate": 3.9122626988199075e-07, + "loss": 0.1737, + "step": 7872 + }, + { + "epoch": 0.64, + "learning_rate": 3.884898238412861e-07, + "loss": 0.2504, + "step": 7904 + }, + { + "epoch": 0.64, + "learning_rate": 3.857533778005815e-07, + "loss": 0.2242, + "step": 7936 + }, + { + "epoch": 0.64, + "learning_rate": 3.8301693175987684e-07, + "loss": 0.205, + "step": 7968 + }, + { + "epoch": 0.64, + "learning_rate": 3.802804857191722e-07, + "loss": 0.2286, + "step": 8000 + }, + { + "epoch": 0.65, + "learning_rate": 3.775440396784676e-07, + "loss": 0.1848, + "step": 8032 + }, + { + "epoch": 0.65, + "learning_rate": 3.7480759363776293e-07, + "loss": 0.2065, + "step": 8064 + }, + { + "epoch": 0.65, + "learning_rate": 3.7207114759705833e-07, + "loss": 0.1688, + "step": 8096 + }, + { + "epoch": 0.65, + "learning_rate": 3.693347015563537e-07, + "loss": 0.2489, + "step": 8128 + }, + { + "epoch": 0.66, + "learning_rate": 3.66598255515649e-07, + "loss": 0.2309, + "step": 8160 + }, + { + "epoch": 0.66, + "learning_rate": 3.6386180947494437e-07, + "loss": 0.1236, + "step": 8192 + }, + { + "epoch": 0.66, + "learning_rate": 3.6112536343423977e-07, + "loss": 0.2242, + "step": 8224 + }, + { + "epoch": 0.66, + "learning_rate": 3.583889173935351e-07, + "loss": 0.221, + "step": 8256 + }, + { + "epoch": 0.67, + "learning_rate": 3.556524713528305e-07, + "loss": 0.1663, + "step": 8288 + }, + { + "epoch": 0.67, + "learning_rate": 3.5291602531212586e-07, + "loss": 0.2063, + "step": 8320 + }, + { + "epoch": 0.67, + "learning_rate": 3.501795792714212e-07, + "loss": 0.2651, + "step": 8352 + }, + { + "epoch": 0.67, + "learning_rate": 3.474431332307166e-07, + "loss": 0.2451, + "step": 8384 + }, + { + "epoch": 0.68, + "learning_rate": 3.4470668719001196e-07, + "loss": 0.1645, + "step": 8416 + }, + { + "epoch": 0.68, + "learning_rate": 3.4197024114930736e-07, + "loss": 0.1446, + "step": 8448 + }, + { + "epoch": 0.68, + "learning_rate": 3.3923379510860265e-07, + "loss": 0.2006, + "step": 8480 + }, + { + "epoch": 0.68, + "learning_rate": 3.3649734906789805e-07, + "loss": 0.1702, + "step": 8512 + }, + { + "epoch": 0.69, + "learning_rate": 3.337609030271934e-07, + "loss": 0.2685, + "step": 8544 + }, + { + "epoch": 0.69, + "learning_rate": 3.310244569864888e-07, + "loss": 0.1944, + "step": 8576 + }, + { + "epoch": 0.69, + "learning_rate": 3.2828801094578414e-07, + "loss": 0.1461, + "step": 8608 + }, + { + "epoch": 0.69, + "learning_rate": 3.255515649050795e-07, + "loss": 0.2539, + "step": 8640 + }, + { + "epoch": 0.7, + "learning_rate": 3.228151188643749e-07, + "loss": 0.1783, + "step": 8672 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 3.69474221039616e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8701/training_args.bin b/checkpoint-8701/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-8701/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/checkpoint-9944/config.json b/checkpoint-9944/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/checkpoint-9944/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/checkpoint-9944/generation_config.json b/checkpoint-9944/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/checkpoint-9944/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/checkpoint-9944/model-00001-of-00002.safetensors b/checkpoint-9944/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d7804cdb3f49fbd1839a53838c13769a016681a --- /dev/null +++ b/checkpoint-9944/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386528b9c8c91441742bf812feb7846446a91581c25c1f8cc8094f19cc9ed2fb +size 4992706480 diff --git a/checkpoint-9944/model-00002-of-00002.safetensors b/checkpoint-9944/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..025cadeca791dcf10cc55bfef2cd3cdaad99ca63 --- /dev/null +++ b/checkpoint-9944/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a77d69ed0034a2eb721669b105d8b3fc2b6f495b509c7dd333588ad62be578f4 +size 1180663192 diff --git a/checkpoint-9944/model.safetensors.index.json b/checkpoint-9944/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/checkpoint-9944/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-9944/optimizer.pt b/checkpoint-9944/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..45e5c5a223bccd96c8e581634136df0c87dc71f3 --- /dev/null +++ b/checkpoint-9944/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9221534c50b9267921cd94dbd180c68797fd7cc061ac4daeed0060a4845f33dc +size 3095074288 diff --git a/checkpoint-9944/preprocessor_config.json b/checkpoint-9944/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/checkpoint-9944/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-9944/rng_state.pth b/checkpoint-9944/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fcfe210e19ea8cc4cbfaee6c502d014b63ff2fed --- /dev/null +++ b/checkpoint-9944/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d6337b77af02591fd224404f5af89818dc0fe425076d46d8598a7768792675 +size 14244 diff --git a/checkpoint-9944/scheduler.pt b/checkpoint-9944/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..caf8017922af55a2dfa32d3fc12b4a22ccd8bdaa --- /dev/null +++ b/checkpoint-9944/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2c677ba0e0753b0abca7ba4b74a70d6496ca6bca23f5709a6af4bef56e6568b +size 1064 diff --git a/checkpoint-9944/trainer_state.json b/checkpoint-9944/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f451bc46f42283c930f544756a9db64248d8a14 --- /dev/null +++ b/checkpoint-9944/trainer_state.json @@ -0,0 +1,1881 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7993569131832797, + "eval_steps": 500, + "global_step": 9944, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.753351206434316e-08, + "loss": 4.9531, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 8.042895442359249e-08, + "loss": 4.9011, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 1.2198391420911528e-07, + "loss": 4.472, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 1.648793565683646e-07, + "loss": 3.9678, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 2.0777479892761392e-07, + "loss": 3.3412, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 2.5067024128686325e-07, + "loss": 2.5657, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 2.9356568364611256e-07, + "loss": 1.9677, + "step": 224 + }, + { + "epoch": 0.02, + "learning_rate": 3.364611260053619e-07, + "loss": 1.6646, + "step": 256 + }, + { + "epoch": 0.02, + "learning_rate": 3.7935656836461123e-07, + "loss": 1.5032, + "step": 288 + }, + { + "epoch": 0.03, + "learning_rate": 4.222520107238606e-07, + "loss": 1.3443, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 4.651474530831099e-07, + "loss": 1.2277, + "step": 352 + }, + { + "epoch": 0.03, + "learning_rate": 5.080428954423593e-07, + "loss": 1.0104, + "step": 384 + }, + { + "epoch": 0.03, + "learning_rate": 5.509383378016086e-07, + "loss": 0.7317, + "step": 416 + }, + { + "epoch": 0.04, + "learning_rate": 5.938337801608579e-07, + "loss": 0.3644, + "step": 448 + }, + { + "epoch": 0.04, + "learning_rate": 6.367292225201072e-07, + "loss": 0.388, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 6.796246648793566e-07, + "loss": 0.264, + "step": 512 + }, + { + "epoch": 0.04, + "learning_rate": 7.225201072386059e-07, + "loss": 0.2479, + "step": 544 + }, + { + "epoch": 0.05, + "learning_rate": 7.654155495978551e-07, + "loss": 0.403, + "step": 576 + }, + { + "epoch": 0.05, + "learning_rate": 8.083109919571045e-07, + "loss": 0.3062, + "step": 608 + }, + { + "epoch": 0.05, + "learning_rate": 8.512064343163538e-07, + "loss": 0.3016, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 8.941018766756032e-07, + "loss": 0.3445, + "step": 672 + }, + { + "epoch": 0.06, + "learning_rate": 9.369973190348524e-07, + "loss": 0.2554, + "step": 704 + }, + { + "epoch": 0.06, + "learning_rate": 9.798927613941018e-07, + "loss": 0.2762, + "step": 736 + }, + { + "epoch": 0.06, + "learning_rate": 9.985462630408755e-07, + "loss": 0.2265, + "step": 768 + }, + { + "epoch": 0.06, + "learning_rate": 9.958098170001709e-07, + "loss": 0.2158, + "step": 800 + }, + { + "epoch": 0.07, + "learning_rate": 9.930733709594662e-07, + "loss": 0.3106, + "step": 832 + }, + { + "epoch": 0.07, + "learning_rate": 9.903369249187618e-07, + "loss": 0.3085, + "step": 864 + }, + { + "epoch": 0.07, + "learning_rate": 9.876004788780571e-07, + "loss": 0.2633, + "step": 896 + }, + { + "epoch": 0.07, + "learning_rate": 9.848640328373525e-07, + "loss": 0.2145, + "step": 928 + }, + { + "epoch": 0.08, + "learning_rate": 9.821275867966478e-07, + "loss": 0.2594, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 9.793911407559432e-07, + "loss": 0.2264, + "step": 992 + }, + { + "epoch": 0.08, + "learning_rate": 9.766546947152385e-07, + "loss": 0.2512, + "step": 1024 + }, + { + "epoch": 0.08, + "learning_rate": 9.739182486745339e-07, + "loss": 0.3154, + "step": 1056 + }, + { + "epoch": 0.09, + "learning_rate": 9.711818026338292e-07, + "loss": 0.2672, + "step": 1088 + }, + { + "epoch": 0.09, + "learning_rate": 9.684453565931246e-07, + "loss": 0.2502, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 9.6570891055242e-07, + "loss": 0.2702, + "step": 1152 + }, + { + "epoch": 0.1, + "learning_rate": 9.629724645117153e-07, + "loss": 0.2919, + "step": 1184 + }, + { + "epoch": 0.1, + "learning_rate": 9.602360184710108e-07, + "loss": 0.3925, + "step": 1216 + }, + { + "epoch": 0.1, + "learning_rate": 9.574995724303062e-07, + "loss": 0.285, + "step": 1248 + }, + { + "epoch": 0.1, + "learning_rate": 9.547631263896015e-07, + "loss": 0.3084, + "step": 1280 + }, + { + "epoch": 0.11, + "learning_rate": 9.520266803488969e-07, + "loss": 0.2275, + "step": 1312 + }, + { + "epoch": 0.11, + "learning_rate": 9.492902343081922e-07, + "loss": 0.245, + "step": 1344 + }, + { + "epoch": 0.11, + "learning_rate": 9.465537882674875e-07, + "loss": 0.233, + "step": 1376 + }, + { + "epoch": 0.11, + "learning_rate": 9.438173422267829e-07, + "loss": 0.2825, + "step": 1408 + }, + { + "epoch": 0.12, + "learning_rate": 9.410808961860783e-07, + "loss": 0.231, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 9.383444501453737e-07, + "loss": 0.2449, + "step": 1472 + }, + { + "epoch": 0.12, + "learning_rate": 9.35608004104669e-07, + "loss": 0.2732, + "step": 1504 + }, + { + "epoch": 0.12, + "learning_rate": 9.328715580639644e-07, + "loss": 0.2031, + "step": 1536 + }, + { + "epoch": 0.13, + "learning_rate": 9.301351120232597e-07, + "loss": 0.1749, + "step": 1568 + }, + { + "epoch": 0.13, + "learning_rate": 9.273986659825551e-07, + "loss": 0.1722, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 9.246622199418504e-07, + "loss": 0.2743, + "step": 1632 + }, + { + "epoch": 0.13, + "learning_rate": 9.219257739011459e-07, + "loss": 0.2907, + "step": 1664 + }, + { + "epoch": 0.14, + "learning_rate": 9.192748417992132e-07, + "loss": 0.2664, + "step": 1696 + }, + { + "epoch": 0.14, + "learning_rate": 9.165383957585086e-07, + "loss": 0.2085, + "step": 1728 + }, + { + "epoch": 0.14, + "learning_rate": 9.13801949717804e-07, + "loss": 0.1839, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 9.110655036770994e-07, + "loss": 0.2667, + "step": 1792 + }, + { + "epoch": 0.15, + "learning_rate": 9.083290576363947e-07, + "loss": 0.1994, + "step": 1824 + }, + { + "epoch": 0.15, + "learning_rate": 9.0559261159569e-07, + "loss": 0.2568, + "step": 1856 + }, + { + "epoch": 0.15, + "learning_rate": 9.028561655549855e-07, + "loss": 0.2909, + "step": 1888 + }, + { + "epoch": 0.15, + "learning_rate": 9.001197195142807e-07, + "loss": 0.2697, + "step": 1920 + }, + { + "epoch": 0.16, + "learning_rate": 8.973832734735761e-07, + "loss": 0.3379, + "step": 1952 + }, + { + "epoch": 0.16, + "learning_rate": 8.946468274328715e-07, + "loss": 0.2866, + "step": 1984 + }, + { + "epoch": 0.16, + "learning_rate": 8.919103813921669e-07, + "loss": 0.2634, + "step": 2016 + }, + { + "epoch": 0.16, + "learning_rate": 8.891739353514622e-07, + "loss": 0.2234, + "step": 2048 + }, + { + "epoch": 0.17, + "learning_rate": 8.864374893107576e-07, + "loss": 0.2541, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 8.83701043270053e-07, + "loss": 0.2341, + "step": 2112 + }, + { + "epoch": 0.17, + "learning_rate": 8.809645972293484e-07, + "loss": 0.2602, + "step": 2144 + }, + { + "epoch": 0.17, + "learning_rate": 8.782281511886437e-07, + "loss": 0.2602, + "step": 2176 + }, + { + "epoch": 0.18, + "learning_rate": 8.75491705147939e-07, + "loss": 0.2036, + "step": 2208 + }, + { + "epoch": 0.18, + "learning_rate": 8.727552591072344e-07, + "loss": 0.2342, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 8.700188130665298e-07, + "loss": 0.2361, + "step": 2272 + }, + { + "epoch": 0.19, + "learning_rate": 8.672823670258251e-07, + "loss": 0.3299, + "step": 2304 + }, + { + "epoch": 0.19, + "learning_rate": 8.645459209851206e-07, + "loss": 0.3221, + "step": 2336 + }, + { + "epoch": 0.19, + "learning_rate": 8.618094749444159e-07, + "loss": 0.2119, + "step": 2368 + }, + { + "epoch": 0.19, + "learning_rate": 8.590730289037113e-07, + "loss": 0.1908, + "step": 2400 + }, + { + "epoch": 0.2, + "learning_rate": 8.563365828630066e-07, + "loss": 0.2736, + "step": 2432 + }, + { + "epoch": 0.2, + "learning_rate": 8.536001368223021e-07, + "loss": 0.1713, + "step": 2464 + }, + { + "epoch": 0.2, + "learning_rate": 8.508636907815974e-07, + "loss": 0.2658, + "step": 2496 + }, + { + "epoch": 0.2, + "learning_rate": 8.481272447408928e-07, + "loss": 0.2235, + "step": 2528 + }, + { + "epoch": 0.21, + "learning_rate": 8.453907987001881e-07, + "loss": 0.1858, + "step": 2560 + }, + { + "epoch": 0.21, + "learning_rate": 8.426543526594834e-07, + "loss": 0.2935, + "step": 2592 + }, + { + "epoch": 0.21, + "learning_rate": 8.399179066187788e-07, + "loss": 0.1996, + "step": 2624 + }, + { + "epoch": 0.21, + "learning_rate": 8.371814605780741e-07, + "loss": 0.2209, + "step": 2656 + }, + { + "epoch": 0.22, + "learning_rate": 8.344450145373696e-07, + "loss": 0.1611, + "step": 2688 + }, + { + "epoch": 0.22, + "learning_rate": 8.317085684966649e-07, + "loss": 0.28, + "step": 2720 + }, + { + "epoch": 0.22, + "learning_rate": 8.289721224559603e-07, + "loss": 0.2486, + "step": 2752 + }, + { + "epoch": 0.22, + "learning_rate": 8.262356764152556e-07, + "loss": 0.1978, + "step": 2784 + }, + { + "epoch": 0.23, + "learning_rate": 8.234992303745511e-07, + "loss": 0.2535, + "step": 2816 + }, + { + "epoch": 0.23, + "learning_rate": 8.207627843338464e-07, + "loss": 0.2666, + "step": 2848 + }, + { + "epoch": 0.23, + "learning_rate": 8.180263382931417e-07, + "loss": 0.1769, + "step": 2880 + }, + { + "epoch": 0.23, + "learning_rate": 8.152898922524371e-07, + "loss": 0.2803, + "step": 2912 + }, + { + "epoch": 0.24, + "learning_rate": 8.125534462117325e-07, + "loss": 0.2129, + "step": 2944 + }, + { + "epoch": 0.24, + "learning_rate": 8.098170001710278e-07, + "loss": 0.2255, + "step": 2976 + }, + { + "epoch": 0.24, + "learning_rate": 8.070805541303232e-07, + "loss": 0.1739, + "step": 3008 + }, + { + "epoch": 0.24, + "learning_rate": 8.043441080896186e-07, + "loss": 0.2321, + "step": 3040 + }, + { + "epoch": 0.25, + "learning_rate": 8.01607662048914e-07, + "loss": 0.2761, + "step": 3072 + }, + { + "epoch": 0.25, + "learning_rate": 7.988712160082093e-07, + "loss": 0.2867, + "step": 3104 + }, + { + "epoch": 0.25, + "learning_rate": 7.961347699675047e-07, + "loss": 0.1763, + "step": 3136 + }, + { + "epoch": 0.25, + "learning_rate": 7.933983239268001e-07, + "loss": 0.325, + "step": 3168 + }, + { + "epoch": 0.26, + "learning_rate": 7.906618778860953e-07, + "loss": 0.2515, + "step": 3200 + }, + { + "epoch": 0.26, + "learning_rate": 7.879254318453907e-07, + "loss": 0.1741, + "step": 3232 + }, + { + "epoch": 0.26, + "learning_rate": 7.851889858046861e-07, + "loss": 0.1999, + "step": 3264 + }, + { + "epoch": 0.26, + "learning_rate": 7.824525397639815e-07, + "loss": 0.2393, + "step": 3296 + }, + { + "epoch": 0.27, + "learning_rate": 7.797160937232768e-07, + "loss": 0.2242, + "step": 3328 + }, + { + "epoch": 0.27, + "learning_rate": 7.769796476825722e-07, + "loss": 0.1877, + "step": 3360 + }, + { + "epoch": 0.27, + "learning_rate": 7.742432016418676e-07, + "loss": 0.194, + "step": 3392 + }, + { + "epoch": 0.28, + "learning_rate": 7.71506755601163e-07, + "loss": 0.2499, + "step": 3424 + }, + { + "epoch": 0.28, + "learning_rate": 7.687703095604583e-07, + "loss": 0.2496, + "step": 3456 + }, + { + "epoch": 0.28, + "learning_rate": 7.660338635197537e-07, + "loss": 0.1899, + "step": 3488 + }, + { + "epoch": 0.28, + "learning_rate": 7.63297417479049e-07, + "loss": 0.1866, + "step": 3520 + }, + { + "epoch": 0.29, + "learning_rate": 7.605609714383444e-07, + "loss": 0.1843, + "step": 3552 + }, + { + "epoch": 0.29, + "learning_rate": 7.578245253976397e-07, + "loss": 0.1991, + "step": 3584 + }, + { + "epoch": 0.29, + "learning_rate": 7.550880793569352e-07, + "loss": 0.2122, + "step": 3616 + }, + { + "epoch": 0.29, + "learning_rate": 7.523516333162305e-07, + "loss": 0.2423, + "step": 3648 + }, + { + "epoch": 0.3, + "learning_rate": 7.496151872755259e-07, + "loss": 0.2568, + "step": 3680 + }, + { + "epoch": 0.3, + "learning_rate": 7.468787412348212e-07, + "loss": 0.2727, + "step": 3712 + }, + { + "epoch": 0.3, + "learning_rate": 7.441422951941167e-07, + "loss": 0.1825, + "step": 3744 + }, + { + "epoch": 0.3, + "learning_rate": 7.41405849153412e-07, + "loss": 0.1573, + "step": 3776 + }, + { + "epoch": 0.31, + "learning_rate": 7.386694031127074e-07, + "loss": 0.2034, + "step": 3808 + }, + { + "epoch": 0.31, + "learning_rate": 7.359329570720028e-07, + "loss": 0.1514, + "step": 3840 + }, + { + "epoch": 0.31, + "learning_rate": 7.33196511031298e-07, + "loss": 0.2618, + "step": 3872 + }, + { + "epoch": 0.31, + "learning_rate": 7.304600649905934e-07, + "loss": 0.244, + "step": 3904 + }, + { + "epoch": 0.32, + "learning_rate": 7.277236189498887e-07, + "loss": 0.1753, + "step": 3936 + }, + { + "epoch": 0.32, + "learning_rate": 7.249871729091842e-07, + "loss": 0.2044, + "step": 3968 + }, + { + "epoch": 0.32, + "learning_rate": 7.222507268684795e-07, + "loss": 0.1882, + "step": 4000 + }, + { + "epoch": 0.32, + "learning_rate": 7.195142808277749e-07, + "loss": 0.2397, + "step": 4032 + }, + { + "epoch": 0.33, + "learning_rate": 7.167778347870702e-07, + "loss": 0.2084, + "step": 4064 + }, + { + "epoch": 0.33, + "learning_rate": 7.140413887463657e-07, + "loss": 0.2635, + "step": 4096 + }, + { + "epoch": 0.33, + "learning_rate": 7.11304942705661e-07, + "loss": 0.2512, + "step": 4128 + }, + { + "epoch": 0.33, + "learning_rate": 7.085684966649563e-07, + "loss": 0.2411, + "step": 4160 + }, + { + "epoch": 0.34, + "learning_rate": 7.058320506242517e-07, + "loss": 0.1846, + "step": 4192 + }, + { + "epoch": 0.34, + "learning_rate": 7.030956045835471e-07, + "loss": 0.1447, + "step": 4224 + }, + { + "epoch": 0.34, + "learning_rate": 7.003591585428424e-07, + "loss": 0.2373, + "step": 4256 + }, + { + "epoch": 0.34, + "learning_rate": 6.976227125021378e-07, + "loss": 0.2097, + "step": 4288 + }, + { + "epoch": 0.35, + "learning_rate": 6.948862664614332e-07, + "loss": 0.2756, + "step": 4320 + }, + { + "epoch": 0.35, + "learning_rate": 6.922353343595006e-07, + "loss": 0.191, + "step": 4352 + }, + { + "epoch": 0.35, + "learning_rate": 6.894988883187959e-07, + "loss": 0.2076, + "step": 4384 + }, + { + "epoch": 0.35, + "learning_rate": 6.867624422780914e-07, + "loss": 0.2787, + "step": 4416 + }, + { + "epoch": 0.36, + "learning_rate": 6.840259962373867e-07, + "loss": 0.1894, + "step": 4448 + }, + { + "epoch": 0.36, + "learning_rate": 6.81289550196682e-07, + "loss": 0.1423, + "step": 4480 + }, + { + "epoch": 0.36, + "learning_rate": 6.785531041559774e-07, + "loss": 0.1738, + "step": 4512 + }, + { + "epoch": 0.37, + "learning_rate": 6.758166581152727e-07, + "loss": 0.2598, + "step": 4544 + }, + { + "epoch": 0.37, + "learning_rate": 6.730802120745681e-07, + "loss": 0.2753, + "step": 4576 + }, + { + "epoch": 0.37, + "learning_rate": 6.703437660338634e-07, + "loss": 0.2922, + "step": 4608 + }, + { + "epoch": 0.37, + "learning_rate": 6.676073199931589e-07, + "loss": 0.172, + "step": 4640 + }, + { + "epoch": 0.38, + "learning_rate": 6.648708739524542e-07, + "loss": 0.2269, + "step": 4672 + }, + { + "epoch": 0.38, + "learning_rate": 6.621344279117496e-07, + "loss": 0.2662, + "step": 4704 + }, + { + "epoch": 0.38, + "learning_rate": 6.59397981871045e-07, + "loss": 0.2674, + "step": 4736 + }, + { + "epoch": 0.38, + "learning_rate": 6.566615358303404e-07, + "loss": 0.2803, + "step": 4768 + }, + { + "epoch": 0.39, + "learning_rate": 6.539250897896357e-07, + "loss": 0.2253, + "step": 4800 + }, + { + "epoch": 0.39, + "learning_rate": 6.51188643748931e-07, + "loss": 0.2816, + "step": 4832 + }, + { + "epoch": 0.39, + "learning_rate": 6.484521977082264e-07, + "loss": 0.1596, + "step": 4864 + }, + { + "epoch": 0.39, + "learning_rate": 6.457157516675218e-07, + "loss": 0.2419, + "step": 4896 + }, + { + "epoch": 0.4, + "learning_rate": 6.429793056268171e-07, + "loss": 0.2344, + "step": 4928 + }, + { + "epoch": 0.4, + "learning_rate": 6.402428595861125e-07, + "loss": 0.2008, + "step": 4960 + }, + { + "epoch": 0.4, + "learning_rate": 6.375064135454079e-07, + "loss": 0.2291, + "step": 4992 + }, + { + "epoch": 0.4, + "learning_rate": 6.347699675047033e-07, + "loss": 0.1661, + "step": 5024 + }, + { + "epoch": 0.41, + "learning_rate": 6.320335214639986e-07, + "loss": 0.2022, + "step": 5056 + }, + { + "epoch": 0.41, + "learning_rate": 6.292970754232941e-07, + "loss": 0.2047, + "step": 5088 + }, + { + "epoch": 0.41, + "learning_rate": 6.265606293825894e-07, + "loss": 0.2004, + "step": 5120 + }, + { + "epoch": 0.41, + "learning_rate": 6.238241833418846e-07, + "loss": 0.1987, + "step": 5152 + }, + { + "epoch": 0.42, + "learning_rate": 6.2108773730118e-07, + "loss": 0.2315, + "step": 5184 + }, + { + "epoch": 0.42, + "learning_rate": 6.183512912604754e-07, + "loss": 0.2086, + "step": 5216 + }, + { + "epoch": 0.42, + "learning_rate": 6.156148452197708e-07, + "loss": 0.1717, + "step": 5248 + }, + { + "epoch": 0.42, + "learning_rate": 6.128783991790661e-07, + "loss": 0.2717, + "step": 5280 + }, + { + "epoch": 0.43, + "learning_rate": 6.101419531383615e-07, + "loss": 0.2229, + "step": 5312 + }, + { + "epoch": 0.43, + "learning_rate": 6.074055070976569e-07, + "loss": 0.1996, + "step": 5344 + }, + { + "epoch": 0.43, + "learning_rate": 6.046690610569523e-07, + "loss": 0.2549, + "step": 5376 + }, + { + "epoch": 0.43, + "learning_rate": 6.019326150162476e-07, + "loss": 0.2378, + "step": 5408 + }, + { + "epoch": 0.44, + "learning_rate": 5.991961689755431e-07, + "loss": 0.2488, + "step": 5440 + }, + { + "epoch": 0.44, + "learning_rate": 5.964597229348383e-07, + "loss": 0.2459, + "step": 5472 + }, + { + "epoch": 0.44, + "learning_rate": 5.937232768941337e-07, + "loss": 0.1928, + "step": 5504 + }, + { + "epoch": 0.45, + "learning_rate": 5.90986830853429e-07, + "loss": 0.2617, + "step": 5536 + }, + { + "epoch": 0.45, + "learning_rate": 5.882503848127245e-07, + "loss": 0.1927, + "step": 5568 + }, + { + "epoch": 0.45, + "learning_rate": 5.855139387720198e-07, + "loss": 0.1943, + "step": 5600 + }, + { + "epoch": 0.45, + "learning_rate": 5.827774927313152e-07, + "loss": 0.241, + "step": 5632 + }, + { + "epoch": 0.46, + "learning_rate": 5.800410466906105e-07, + "loss": 0.2354, + "step": 5664 + }, + { + "epoch": 0.46, + "learning_rate": 5.77304600649906e-07, + "loss": 0.2227, + "step": 5696 + }, + { + "epoch": 0.46, + "learning_rate": 5.745681546092013e-07, + "loss": 0.2395, + "step": 5728 + }, + { + "epoch": 0.46, + "learning_rate": 5.718317085684967e-07, + "loss": 0.1895, + "step": 5760 + }, + { + "epoch": 0.47, + "learning_rate": 5.690952625277921e-07, + "loss": 0.1621, + "step": 5792 + }, + { + "epoch": 0.47, + "learning_rate": 5.663588164870873e-07, + "loss": 0.2169, + "step": 5824 + }, + { + "epoch": 0.47, + "learning_rate": 5.636223704463827e-07, + "loss": 0.1979, + "step": 5856 + }, + { + "epoch": 0.47, + "learning_rate": 5.60885924405678e-07, + "loss": 0.206, + "step": 5888 + }, + { + "epoch": 0.48, + "learning_rate": 5.581494783649735e-07, + "loss": 0.1625, + "step": 5920 + }, + { + "epoch": 0.48, + "learning_rate": 5.554130323242688e-07, + "loss": 0.2625, + "step": 5952 + }, + { + "epoch": 0.48, + "learning_rate": 5.526765862835642e-07, + "loss": 0.1797, + "step": 5984 + }, + { + "epoch": 0.48, + "learning_rate": 5.499401402428595e-07, + "loss": 0.2056, + "step": 6016 + }, + { + "epoch": 0.49, + "learning_rate": 5.47203694202155e-07, + "loss": 0.1659, + "step": 6048 + }, + { + "epoch": 0.49, + "learning_rate": 5.444672481614503e-07, + "loss": 0.1524, + "step": 6080 + }, + { + "epoch": 0.49, + "learning_rate": 5.417308021207456e-07, + "loss": 0.236, + "step": 6112 + }, + { + "epoch": 0.49, + "learning_rate": 5.38994356080041e-07, + "loss": 0.1921, + "step": 6144 + }, + { + "epoch": 0.5, + "learning_rate": 5.362579100393364e-07, + "loss": 0.217, + "step": 6176 + }, + { + "epoch": 0.5, + "learning_rate": 5.335214639986317e-07, + "loss": 0.1996, + "step": 6208 + }, + { + "epoch": 0.5, + "learning_rate": 5.307850179579271e-07, + "loss": 0.1467, + "step": 6240 + }, + { + "epoch": 0.5, + "learning_rate": 5.280485719172225e-07, + "loss": 0.2702, + "step": 6272 + }, + { + "epoch": 0.51, + "learning_rate": 5.253121258765179e-07, + "loss": 0.1736, + "step": 6304 + }, + { + "epoch": 0.51, + "learning_rate": 5.225756798358132e-07, + "loss": 0.2281, + "step": 6336 + }, + { + "epoch": 0.51, + "learning_rate": 5.198392337951087e-07, + "loss": 0.2114, + "step": 6368 + }, + { + "epoch": 0.51, + "learning_rate": 5.17102787754404e-07, + "loss": 0.2252, + "step": 6400 + }, + { + "epoch": 0.52, + "learning_rate": 5.143663417136994e-07, + "loss": 0.163, + "step": 6432 + }, + { + "epoch": 0.52, + "learning_rate": 5.116298956729946e-07, + "loss": 0.266, + "step": 6464 + }, + { + "epoch": 0.52, + "learning_rate": 5.0889344963229e-07, + "loss": 0.2154, + "step": 6496 + }, + { + "epoch": 0.52, + "learning_rate": 5.061570035915854e-07, + "loss": 0.2003, + "step": 6528 + }, + { + "epoch": 0.53, + "learning_rate": 5.034205575508807e-07, + "loss": 0.1969, + "step": 6560 + }, + { + "epoch": 0.53, + "learning_rate": 5.006841115101761e-07, + "loss": 0.2436, + "step": 6592 + }, + { + "epoch": 0.53, + "learning_rate": 4.979476654694714e-07, + "loss": 0.134, + "step": 6624 + }, + { + "epoch": 0.54, + "learning_rate": 4.952112194287669e-07, + "loss": 0.2246, + "step": 6656 + }, + { + "epoch": 0.54, + "learning_rate": 4.924747733880622e-07, + "loss": 0.1873, + "step": 6688 + }, + { + "epoch": 0.54, + "learning_rate": 4.897383273473576e-07, + "loss": 0.1549, + "step": 6720 + }, + { + "epoch": 0.54, + "learning_rate": 4.87001881306653e-07, + "loss": 0.2518, + "step": 6752 + }, + { + "epoch": 0.55, + "learning_rate": 4.842654352659483e-07, + "loss": 0.2676, + "step": 6784 + }, + { + "epoch": 0.55, + "learning_rate": 4.815289892252437e-07, + "loss": 0.1537, + "step": 6816 + }, + { + "epoch": 0.55, + "learning_rate": 4.787925431845391e-07, + "loss": 0.1925, + "step": 6848 + }, + { + "epoch": 0.55, + "learning_rate": 4.760560971438344e-07, + "loss": 0.2157, + "step": 6880 + }, + { + "epoch": 0.56, + "learning_rate": 4.7331965110312977e-07, + "loss": 0.1923, + "step": 6912 + }, + { + "epoch": 0.56, + "learning_rate": 4.7058320506242517e-07, + "loss": 0.1922, + "step": 6944 + }, + { + "epoch": 0.56, + "learning_rate": 4.678467590217205e-07, + "loss": 0.257, + "step": 6976 + }, + { + "epoch": 0.56, + "learning_rate": 4.6511031298101586e-07, + "loss": 0.1744, + "step": 7008 + }, + { + "epoch": 0.57, + "learning_rate": 4.6237386694031126e-07, + "loss": 0.2281, + "step": 7040 + }, + { + "epoch": 0.57, + "learning_rate": 4.596374208996066e-07, + "loss": 0.2135, + "step": 7072 + }, + { + "epoch": 0.57, + "learning_rate": 4.56900974858902e-07, + "loss": 0.1841, + "step": 7104 + }, + { + "epoch": 0.57, + "learning_rate": 4.5416452881819735e-07, + "loss": 0.2652, + "step": 7136 + }, + { + "epoch": 0.58, + "learning_rate": 4.5142808277749275e-07, + "loss": 0.225, + "step": 7168 + }, + { + "epoch": 0.58, + "learning_rate": 4.4869163673678804e-07, + "loss": 0.2377, + "step": 7200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4595519069608344e-07, + "loss": 0.2323, + "step": 7232 + }, + { + "epoch": 0.58, + "learning_rate": 4.432187446553788e-07, + "loss": 0.2227, + "step": 7264 + }, + { + "epoch": 0.59, + "learning_rate": 4.404822986146742e-07, + "loss": 0.2968, + "step": 7296 + }, + { + "epoch": 0.59, + "learning_rate": 4.377458525739695e-07, + "loss": 0.2623, + "step": 7328 + }, + { + "epoch": 0.59, + "learning_rate": 4.350094065332649e-07, + "loss": 0.1943, + "step": 7360 + }, + { + "epoch": 0.59, + "learning_rate": 4.322729604925603e-07, + "loss": 0.158, + "step": 7392 + }, + { + "epoch": 0.6, + "learning_rate": 4.2953651445185563e-07, + "loss": 0.2413, + "step": 7424 + }, + { + "epoch": 0.6, + "learning_rate": 4.2680006841115103e-07, + "loss": 0.2413, + "step": 7456 + }, + { + "epoch": 0.6, + "learning_rate": 4.240636223704464e-07, + "loss": 0.1793, + "step": 7488 + }, + { + "epoch": 0.6, + "learning_rate": 4.213271763297417e-07, + "loss": 0.2505, + "step": 7520 + }, + { + "epoch": 0.61, + "learning_rate": 4.1859073028903707e-07, + "loss": 0.2078, + "step": 7552 + }, + { + "epoch": 0.61, + "learning_rate": 4.1585428424833247e-07, + "loss": 0.2603, + "step": 7584 + }, + { + "epoch": 0.61, + "learning_rate": 4.131178382076278e-07, + "loss": 0.1426, + "step": 7616 + }, + { + "epoch": 0.61, + "learning_rate": 4.103813921669232e-07, + "loss": 0.1823, + "step": 7648 + }, + { + "epoch": 0.62, + "learning_rate": 4.0764494612621856e-07, + "loss": 0.2215, + "step": 7680 + }, + { + "epoch": 0.62, + "learning_rate": 4.049085000855139e-07, + "loss": 0.2956, + "step": 7712 + }, + { + "epoch": 0.62, + "learning_rate": 4.021720540448093e-07, + "loss": 0.1766, + "step": 7744 + }, + { + "epoch": 0.63, + "learning_rate": 3.9943560800410465e-07, + "loss": 0.2124, + "step": 7776 + }, + { + "epoch": 0.63, + "learning_rate": 3.9669916196340005e-07, + "loss": 0.2123, + "step": 7808 + }, + { + "epoch": 0.63, + "learning_rate": 3.9396271592269535e-07, + "loss": 0.2172, + "step": 7840 + }, + { + "epoch": 0.63, + "learning_rate": 3.9122626988199075e-07, + "loss": 0.1737, + "step": 7872 + }, + { + "epoch": 0.64, + "learning_rate": 3.884898238412861e-07, + "loss": 0.2504, + "step": 7904 + }, + { + "epoch": 0.64, + "learning_rate": 3.857533778005815e-07, + "loss": 0.2242, + "step": 7936 + }, + { + "epoch": 0.64, + "learning_rate": 3.8301693175987684e-07, + "loss": 0.205, + "step": 7968 + }, + { + "epoch": 0.64, + "learning_rate": 3.802804857191722e-07, + "loss": 0.2286, + "step": 8000 + }, + { + "epoch": 0.65, + "learning_rate": 3.775440396784676e-07, + "loss": 0.1848, + "step": 8032 + }, + { + "epoch": 0.65, + "learning_rate": 3.7480759363776293e-07, + "loss": 0.2065, + "step": 8064 + }, + { + "epoch": 0.65, + "learning_rate": 3.7207114759705833e-07, + "loss": 0.1688, + "step": 8096 + }, + { + "epoch": 0.65, + "learning_rate": 3.693347015563537e-07, + "loss": 0.2489, + "step": 8128 + }, + { + "epoch": 0.66, + "learning_rate": 3.66598255515649e-07, + "loss": 0.2309, + "step": 8160 + }, + { + "epoch": 0.66, + "learning_rate": 3.6386180947494437e-07, + "loss": 0.1236, + "step": 8192 + }, + { + "epoch": 0.66, + "learning_rate": 3.6112536343423977e-07, + "loss": 0.2242, + "step": 8224 + }, + { + "epoch": 0.66, + "learning_rate": 3.583889173935351e-07, + "loss": 0.221, + "step": 8256 + }, + { + "epoch": 0.67, + "learning_rate": 3.556524713528305e-07, + "loss": 0.1663, + "step": 8288 + }, + { + "epoch": 0.67, + "learning_rate": 3.5291602531212586e-07, + "loss": 0.2063, + "step": 8320 + }, + { + "epoch": 0.67, + "learning_rate": 3.501795792714212e-07, + "loss": 0.2651, + "step": 8352 + }, + { + "epoch": 0.67, + "learning_rate": 3.474431332307166e-07, + "loss": 0.2451, + "step": 8384 + }, + { + "epoch": 0.68, + "learning_rate": 3.4470668719001196e-07, + "loss": 0.1645, + "step": 8416 + }, + { + "epoch": 0.68, + "learning_rate": 3.4197024114930736e-07, + "loss": 0.1446, + "step": 8448 + }, + { + "epoch": 0.68, + "learning_rate": 3.3923379510860265e-07, + "loss": 0.2006, + "step": 8480 + }, + { + "epoch": 0.68, + "learning_rate": 3.3649734906789805e-07, + "loss": 0.1702, + "step": 8512 + }, + { + "epoch": 0.69, + "learning_rate": 3.337609030271934e-07, + "loss": 0.2685, + "step": 8544 + }, + { + "epoch": 0.69, + "learning_rate": 3.310244569864888e-07, + "loss": 0.1944, + "step": 8576 + }, + { + "epoch": 0.69, + "learning_rate": 3.2828801094578414e-07, + "loss": 0.1461, + "step": 8608 + }, + { + "epoch": 0.69, + "learning_rate": 3.255515649050795e-07, + "loss": 0.2539, + "step": 8640 + }, + { + "epoch": 0.7, + "learning_rate": 3.228151188643749e-07, + "loss": 0.1783, + "step": 8672 + }, + { + "epoch": 0.7, + "learning_rate": 3.2007867282367024e-07, + "loss": 0.2084, + "step": 8704 + }, + { + "epoch": 0.7, + "learning_rate": 3.1734222678296563e-07, + "loss": 0.1596, + "step": 8736 + }, + { + "epoch": 0.7, + "learning_rate": 3.14605780742261e-07, + "loss": 0.1677, + "step": 8768 + }, + { + "epoch": 0.71, + "learning_rate": 3.1186933470155633e-07, + "loss": 0.1636, + "step": 8800 + }, + { + "epoch": 0.71, + "learning_rate": 3.091328886608517e-07, + "loss": 0.2336, + "step": 8832 + }, + { + "epoch": 0.71, + "learning_rate": 3.063964426201471e-07, + "loss": 0.1853, + "step": 8864 + }, + { + "epoch": 0.72, + "learning_rate": 3.036599965794424e-07, + "loss": 0.2304, + "step": 8896 + }, + { + "epoch": 0.72, + "learning_rate": 3.009235505387378e-07, + "loss": 0.2037, + "step": 8928 + }, + { + "epoch": 0.72, + "learning_rate": 2.9818710449803317e-07, + "loss": 0.2159, + "step": 8960 + }, + { + "epoch": 0.72, + "learning_rate": 2.954506584573285e-07, + "loss": 0.2512, + "step": 8992 + }, + { + "epoch": 0.73, + "learning_rate": 2.927142124166239e-07, + "loss": 0.2011, + "step": 9024 + }, + { + "epoch": 0.73, + "learning_rate": 2.8997776637591926e-07, + "loss": 0.1677, + "step": 9056 + }, + { + "epoch": 0.73, + "learning_rate": 2.8724132033521466e-07, + "loss": 0.192, + "step": 9088 + }, + { + "epoch": 0.73, + "learning_rate": 2.8450487429450995e-07, + "loss": 0.1615, + "step": 9120 + }, + { + "epoch": 0.74, + "learning_rate": 2.8176842825380535e-07, + "loss": 0.1646, + "step": 9152 + }, + { + "epoch": 0.74, + "learning_rate": 2.790319822131007e-07, + "loss": 0.2301, + "step": 9184 + }, + { + "epoch": 0.74, + "learning_rate": 2.762955361723961e-07, + "loss": 0.1663, + "step": 9216 + }, + { + "epoch": 0.74, + "learning_rate": 2.7355909013169145e-07, + "loss": 0.251, + "step": 9248 + }, + { + "epoch": 0.75, + "learning_rate": 2.708226440909868e-07, + "loss": 0.2301, + "step": 9280 + }, + { + "epoch": 0.75, + "learning_rate": 2.680861980502822e-07, + "loss": 0.2222, + "step": 9312 + }, + { + "epoch": 0.75, + "learning_rate": 2.6534975200957754e-07, + "loss": 0.1784, + "step": 9344 + }, + { + "epoch": 0.75, + "learning_rate": 2.6261330596887294e-07, + "loss": 0.1714, + "step": 9376 + }, + { + "epoch": 0.76, + "learning_rate": 2.598768599281683e-07, + "loss": 0.2258, + "step": 9408 + }, + { + "epoch": 0.76, + "learning_rate": 2.571404138874637e-07, + "loss": 0.1907, + "step": 9440 + }, + { + "epoch": 0.76, + "learning_rate": 2.54403967846759e-07, + "loss": 0.1938, + "step": 9472 + }, + { + "epoch": 0.76, + "learning_rate": 2.516675218060544e-07, + "loss": 0.1831, + "step": 9504 + }, + { + "epoch": 0.77, + "learning_rate": 2.489310757653497e-07, + "loss": 0.1833, + "step": 9536 + }, + { + "epoch": 0.77, + "learning_rate": 2.461946297246451e-07, + "loss": 0.2551, + "step": 9568 + }, + { + "epoch": 0.77, + "learning_rate": 2.4345818368394047e-07, + "loss": 0.1553, + "step": 9600 + }, + { + "epoch": 0.77, + "learning_rate": 2.407217376432358e-07, + "loss": 0.1785, + "step": 9632 + }, + { + "epoch": 0.78, + "learning_rate": 2.379852916025312e-07, + "loss": 0.2903, + "step": 9664 + }, + { + "epoch": 0.78, + "learning_rate": 2.3524884556182656e-07, + "loss": 0.2093, + "step": 9696 + }, + { + "epoch": 0.78, + "learning_rate": 2.325123995211219e-07, + "loss": 0.2865, + "step": 9728 + }, + { + "epoch": 0.78, + "learning_rate": 2.2977595348041728e-07, + "loss": 0.2033, + "step": 9760 + }, + { + "epoch": 0.79, + "learning_rate": 2.2703950743971268e-07, + "loss": 0.1929, + "step": 9792 + }, + { + "epoch": 0.79, + "learning_rate": 2.2430306139900803e-07, + "loss": 0.1968, + "step": 9824 + }, + { + "epoch": 0.79, + "learning_rate": 2.215666153583034e-07, + "loss": 0.1985, + "step": 9856 + }, + { + "epoch": 0.79, + "learning_rate": 2.1883016931759877e-07, + "loss": 0.1509, + "step": 9888 + }, + { + "epoch": 0.8, + "learning_rate": 2.1609372327689412e-07, + "loss": 0.1988, + "step": 9920 + } + ], + "logging_steps": 32, + "max_steps": 12440, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1243, + "total_flos": 4.22256252616704e+19, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-9944/training_args.bin b/checkpoint-9944/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/checkpoint-9944/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec8ed74527c8219d428d1f813aa49a362ce97275 --- /dev/null +++ b/config.json @@ -0,0 +1,66 @@ +{ + "_name_or_path": "openai/whisper-large-v2", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": false, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + 50259 + ], + [ + 2, + 50359 + ], + [ + 3, + 50363 + ] + ], + "init_std": 0.02, + "is_encoder_decoder": true, + "language": "English", + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 80, + "pad_token_id": 50257, + "scale_embedding": false, + "suppress_tokens": [], + "torch_dtype": "float32", + "transformers_version": "4.37.2", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51865 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e61056ed26485ce5377ccbfb5978175d6527a2d --- /dev/null +++ b/generation_config.json @@ -0,0 +1,317 @@ +{ + "alignment_heads": [ + [ + 10, + 12 + ], + [ + 13, + 17 + ], + [ + 16, + 11 + ], + [ + 16, + 12 + ], + [ + 16, + 13 + ], + [ + 17, + 15 + ], + [ + 17, + 16 + ], + [ + 18, + 4 + ], + [ + 18, + 11 + ], + [ + 18, + 19 + ], + [ + 19, + 11 + ], + [ + 21, + 2 + ], + [ + 21, + 3 + ], + [ + 22, + 3 + ], + [ + 22, + 9 + ], + [ + 22, + 12 + ], + [ + 23, + 5 + ], + [ + 23, + 7 + ], + [ + 23, + 13 + ], + [ + 25, + 5 + ], + [ + 26, + 1 + ], + [ + 26, + 12 + ], + [ + 27, + 15 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50363, + "pad_token_id": 50257, + "prev_sot_token_id": 50361, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50358, + 50359, + 50360, + 50361, + 50362 + ], + "task": "transcribe", + "task_to_id": { + "transcribe": 50359, + "translate": 50358 + }, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3c0f1551b842aa48fa979bff000e7b87a122786 --- /dev/null +++ b/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf84125391ae5e4fdb39b7889773fdd74bc0199cfeba01327de5a2b08db83040 +size 4992706480 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b6d9657840798d3e1062376e0ec3ae174c58d29 --- /dev/null +++ b/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4351589d90d696da8c71bf0688d7cf662f8a9ee6e030a3e346f73025278d2988 +size 1180663192 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..24151282ff868725b117fb8cfd96642d85e4d28a --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173219840 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..91876762a536a746d268353c5cba57286e76b058 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 80, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..de74d9ac505e2b78c1eaae5bd2d033ced817e52a --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2343c53f43ecba9eda3fbab0cfda8b6794aaa661c23626f520c968ab283189ab +size 5048