Upload tokenizer files

Browse files

Files changed (3) hide show

special_tokens_map.json +6 -0
tokenizer.json +181 -0
tokenizer_config.json +43 -0

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|startoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|pad|>",
+  "unk_token": "<|unknown|>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,181 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<|pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<|startoftext|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<|endoftext|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "<|unknown|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "Whitespace"
+      },
+      {
+        "type": "Split",
+        "pattern": {
+          "Regex": "\\d|[QBRN]"
+        },
+        "behavior": "MergedWithPrevious",
+        "invert": false
+      }
+    ]
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<|startoftext|>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<|startoftext|>": {
+        "id": "<|startoftext|>",
+        "ids": [
+          1
+        ],
+        "tokens": [
+          "<|startoftext|>"
+        ]
+      }
+    }
+  },
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "<|pad|>": 0,
+      "<|startoftext|>": 1,
+      "<|endoftext|>": 2,
+      "<|unknown|>": 3,
+      "a1": 4,
+      "b1": 5,
+      "c1": 6,
+      "d1": 7,
+      "e1": 8,
+      "f1": 9,
+      "g1": 10,
+      "h1": 11,
+      "a2": 12,
+      "b2": 13,
+      "c2": 14,
+      "d2": 15,
+      "e2": 16,
+      "f2": 17,
+      "g2": 18,
+      "h2": 19,
+      "a3": 20,
+      "b3": 21,
+      "c3": 22,
+      "d3": 23,
+      "e3": 24,
+      "f3": 25,
+      "g3": 26,
+      "h3": 27,
+      "a4": 28,
+      "b4": 29,
+      "c4": 30,
+      "d4": 31,
+      "e4": 32,
+      "f4": 33,
+      "g4": 34,
+      "h4": 35,
+      "a5": 36,
+      "b5": 37,
+      "c5": 38,
+      "d5": 39,
+      "e5": 40,
+      "f5": 41,
+      "g5": 42,
+      "h5": 43,
+      "a6": 44,
+      "b6": 45,
+      "c6": 46,
+      "d6": 47,
+      "e6": 48,
+      "f6": 49,
+      "g6": 50,
+      "h6": 51,
+      "a7": 52,
+      "b7": 53,
+      "c7": 54,
+      "d7": 55,
+      "e7": 56,
+      "f7": 57,
+      "g7": 58,
+      "h7": 59,
+      "a8": 60,
+      "b8": 61,
+      "c8": 62,
+      "d8": 63,
+      "e8": 64,
+      "f8": 65,
+      "g8": 66,
+      "h8": 67,
+      "Q": 68,
+      "R": 69,
+      "B": 70,
+      "N": 71
+    },
+    "unk_token": "<|unknown|>"
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<|unknown|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "pad_token": "<|pad|>",
+  "tokenizer_class": "UciTileTokenizer",
+  "unk_token": "<|unknown|>"
+}