adjldh
Browse files- tokenizeConfig.py +5 -0
tokenizeConfig.py
CHANGED
@@ -53,6 +53,11 @@ class OBITokenizer(PreTrainedTokenizer):
|
|
53 |
# Load the vocabulary file
|
54 |
self.tokenizer.get_vocab().add_special_tokens([self.cls_token, self.sep_token, self.unk_token, self.mask_token])
|
55 |
|
|
|
|
|
|
|
|
|
|
|
56 |
def _tokenize(self, text):
|
57 |
# Implement your custom tokenization logic here
|
58 |
# In this example, we split the text into tokens using whitespace
|
|
|
53 |
# Load the vocabulary file
|
54 |
self.tokenizer.get_vocab().add_special_tokens([self.cls_token, self.sep_token, self.unk_token, self.mask_token])
|
55 |
|
56 |
+
def add_special_tokens(self, special_tokens_dict):
|
57 |
+
# Implement the addition of special tokens to the vocabulary here
|
58 |
+
# Example: self.tokenizer.get_vocab().add_special_tokens(special_tokens_dict)
|
59 |
+
return self.tokenizer.get_vocab().add_special_tokens(special_tokens_dict)
|
60 |
+
|
61 |
def _tokenize(self, text):
|
62 |
# Implement your custom tokenization logic here
|
63 |
# In this example, we split the text into tokens using whitespace
|