aframson commited on
Commit
dfe07c8
·
1 Parent(s): e5d8b42
Files changed (1) hide show
  1. tokenizeConfig.py +5 -0
tokenizeConfig.py CHANGED
@@ -53,6 +53,11 @@ class OBITokenizer(PreTrainedTokenizer):
53
  # Load the vocabulary file
54
  self.tokenizer.get_vocab().add_special_tokens([self.cls_token, self.sep_token, self.unk_token, self.mask_token])
55
 
 
 
 
 
 
56
  def _tokenize(self, text):
57
  # Implement your custom tokenization logic here
58
  # In this example, we split the text into tokens using whitespace
 
53
  # Load the vocabulary file
54
  self.tokenizer.get_vocab().add_special_tokens([self.cls_token, self.sep_token, self.unk_token, self.mask_token])
55
 
56
+ def add_special_tokens(self, special_tokens_dict):
57
+ # Implement the addition of special tokens to the vocabulary here
58
+ # Example: self.tokenizer.get_vocab().add_special_tokens(special_tokens_dict)
59
+ return self.tokenizer.get_vocab().add_special_tokens(special_tokens_dict)
60
+
61
  def _tokenize(self, text):
62
  # Implement your custom tokenization logic here
63
  # In this example, we split the text into tokens using whitespace