aframson commited on
Commit
4e47b59
·
1 Parent(s): ac72af3
Files changed (1) hide show
  1. tokenizeConfig.py +11 -0
tokenizeConfig.py CHANGED
@@ -21,6 +21,17 @@ class OBITokenizer(PreTrainedTokenizer):
21
  self.tokenizer.decoder = decoders.ByteLevel()
22
  super().__init__(**kwargs)
23
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def _pad(
26
  self,
 
21
  self.tokenizer.decoder = decoders.ByteLevel()
22
  super().__init__(**kwargs)
23
 
24
+ # Set the padding token
25
+ self.pad_token = "[PAD]"
26
+ # Set the special tokens
27
+ self.cls_token = "[CLS]"
28
+ self.sep_token = "[SEP]"
29
+ self.unk_token = "[UNK]"
30
+ self.mask_token = "[MASK]"
31
+ self.bos_token = "[CLS]"
32
+ self.eos_token = "[SEP]"
33
+ self.pad_token = "[PAD]"
34
+
35
 
36
  def _pad(
37
  self,