aframson commited on
Commit
e048a1f
·
1 Parent(s): 1fcedf8
Files changed (1) hide show
  1. tokenizer_config.json +12 -16
tokenizer_config.json CHANGED
@@ -1,17 +1,13 @@
1
-
2
- {
3
- "auto_map": {
4
- "AutoTokenizer": [
5
- "tokenizeConfig.OBITokenizer",
6
- null
7
- ]
8
- },
9
- "bos_token": "<s>",
10
- "clean_up_tokenization_spaces": false,
11
- "eos_token": "</s>",
12
- "model_max_length": 1000000000000000019884624838656,
13
- "pad_token": "</s>",
14
  "tokenizer_class": "OBITokenizer",
15
- "unk_token": "<unk>"
16
- }
17
-
 
 
 
 
 
 
1
+
2
+ {
3
+ "pat_str": "'s|'t|'re|'ve|'m|'ll|'d| ?[\\p{L}]+| ?[\\p{N}]+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+",
4
+
 
 
 
 
 
 
 
 
 
5
  "tokenizer_class": "OBITokenizer",
6
+ "auto_map": {
7
+ "AutoTokenizer": [
8
+ "tokenizeConfig.OBITokenizer",
9
+ null
10
+ ]
11
+ },
12
+ "vocab_size": 600
13
+ }