SM0rc commited on
Commit
a802e4a
·
verified ·
1 Parent(s): c4d9a35

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|eot_id|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<pad>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -2306,6 +2306,15 @@
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
 
 
 
 
 
 
 
 
 
2309
  }
2310
  ],
2311
  "normalizer": null,
 
2306
  "rstrip": false,
2307
  "normalized": false,
2308
  "special": true
2309
+ },
2310
+ {
2311
+ "id": 128256,
2312
+ "content": "<pad>",
2313
+ "single_word": false,
2314
+ "lstrip": false,
2315
+ "rstrip": false,
2316
+ "normalized": false,
2317
+ "special": true
2318
  }
2319
  ],
2320
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -2047,6 +2047,14 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
@@ -2059,7 +2067,7 @@
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 1000000000000000019884624838656,
2062
- "pad_token": "<|eot_id|>",
2063
  "stride": 0,
2064
  "tokenizer_class": "PreTrainedTokenizerFast",
2065
  "truncation_side": "right",
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
+ },
2051
+ "128256": {
2052
+ "content": "<pad>",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
  }
2059
  },
2060
  "bos_token": "<|begin_of_text|>",
 
2067
  "attention_mask"
2068
  ],
2069
  "model_max_length": 1000000000000000019884624838656,
2070
+ "pad_token": "<pad>",
2071
  "stride": 0,
2072
  "tokenizer_class": "PreTrainedTokenizerFast",
2073
  "truncation_side": "right",