adad
Browse files- tokenizeConfig.py +11 -0
tokenizeConfig.py
CHANGED
@@ -21,6 +21,17 @@ class OBITokenizer(PreTrainedTokenizer):
|
|
21 |
self.tokenizer.decoder = decoders.ByteLevel()
|
22 |
super().__init__(**kwargs)
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
def _pad(
|
26 |
self,
|
|
|
21 |
self.tokenizer.decoder = decoders.ByteLevel()
|
22 |
super().__init__(**kwargs)
|
23 |
|
24 |
+
# Set the padding token
|
25 |
+
self.pad_token = "[PAD]"
|
26 |
+
# Set the special tokens
|
27 |
+
self.cls_token = "[CLS]"
|
28 |
+
self.sep_token = "[SEP]"
|
29 |
+
self.unk_token = "[UNK]"
|
30 |
+
self.mask_token = "[MASK]"
|
31 |
+
self.bos_token = "[CLS]"
|
32 |
+
self.eos_token = "[SEP]"
|
33 |
+
self.pad_token = "[PAD]"
|
34 |
+
|
35 |
|
36 |
def _pad(
|
37 |
self,
|