aframson commited on
Commit
f7b41c1
·
1 Parent(s): b5f3842
Files changed (1) hide show
  1. tokenizeConfig.py +5 -1
tokenizeConfig.py CHANGED
@@ -3,7 +3,10 @@ from tokenizers import Tokenizer, models, pre_tokenizers, trainers, decoders
3
  import json
4
 
5
  class OBITokenizer(PreTrainedTokenizer):
6
- def __init__(self):
 
 
 
7
  # Initialize a BPE model for tokenization
8
  bpe_model = models.BPE()
9
  # Initialize the tokenizer
@@ -11,6 +14,7 @@ class OBITokenizer(PreTrainedTokenizer):
11
  # Add pre-tokenization and decoding steps if needed
12
  self.tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel()
13
  self.tokenizer.decoder = decoders.ByteLevel()
 
14
 
15
  def train(self, files,save_path):
16
  # Training: Fit the tokenizer on your text data
 
3
  import json
4
 
5
  class OBITokenizer(PreTrainedTokenizer):
6
+ def __init__(self, auto_map={"AutoTokenizer": ["tokenizeConfig.OBITokenizer"]},tokenizer_class= "OBITokenizer",**kwargs):
7
+ # Initialize your tokenizer with the auto_map parameter if needed
8
+ self.auto_map=auto_map
9
+ self.tokenizer_class=tokenizer_class
10
  # Initialize a BPE model for tokenization
11
  bpe_model = models.BPE()
12
  # Initialize the tokenizer
 
14
  # Add pre-tokenization and decoding steps if needed
15
  self.tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel()
16
  self.tokenizer.decoder = decoders.ByteLevel()
17
+ super().__init__(**kwargs)
18
 
19
  def train(self, files,save_path):
20
  # Training: Fit the tokenizer on your text data