Oriya
sentencepiece
shantipriya commited on
Commit
85d1ed9
·
verified ·
1 Parent(s): c779351

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -2
README.md CHANGED
@@ -31,18 +31,25 @@ pip install sentencepiece
31
 
32
  ```python
33
  import sentencepiece as spm
 
 
 
 
34
 
35
  # Load the tokenizer model
36
  sp = spm.SentencePieceProcessor()
37
- sp.load("path/to/odia_tokenizers_test.model")
38
 
39
  # Sample text for tokenization
40
  text = "ଦୀପାବଳି ଏକ ଭାରତୀୟ ପର୍ବ ।"
41
 
42
- # Tokenize the text
43
  tokens = sp.encode_as_pieces(text)
 
 
44
  token_ids = sp.encode_as_ids(text)
45
 
 
46
  print("Tokens:", tokens)
47
  print("Token IDs:", token_ids)
48
  ```
 
31
 
32
  ```python
33
  import sentencepiece as spm
34
+ from huggingface_hub import hf_hub_download
35
+
36
+ # Download the model file from Hugging Face
37
+ model_path = hf_hub_download(repo_id="shantipriya/OdiaTokenizer", filename="odia_tokenizers_test.model")
38
 
39
  # Load the tokenizer model
40
  sp = spm.SentencePieceProcessor()
41
+ sp.load(model_path)
42
 
43
  # Sample text for tokenization
44
  text = "ଦୀପାବଳି ଏକ ଭାରତୀୟ ପର୍ବ ।"
45
 
46
+ # Tokenize the text into pieces (subwords or tokens)
47
  tokens = sp.encode_as_pieces(text)
48
+
49
+ # Tokenize the text into token IDs (integer representations of the tokens)
50
  token_ids = sp.encode_as_ids(text)
51
 
52
+ # Print the tokenized output
53
  print("Tokens:", tokens)
54
  print("Token IDs:", token_ids)
55
  ```