shantipriya
/

OdiaTokenizer

Model card Files Files and versions Community

shantipriya commited on Nov 9, 2024

Commit

85d1ed9

·

verified ·

1 Parent(s): c779351

Update README.md

Files changed (1) hide show

README.md +9 -2

README.md CHANGED Viewed

@@ -31,18 +31,25 @@ pip install sentencepiece
 ```python
 import sentencepiece as spm
 # Load the tokenizer model
 sp = spm.SentencePieceProcessor()
-sp.load("path/to/odia_tokenizers_test.model")
 # Sample text for tokenization
 text = "ଦୀପାବଳି ଏକ ଭାରତୀୟ ପର୍ବ ।"
-# Tokenize the text
 tokens = sp.encode_as_pieces(text)
 token_ids = sp.encode_as_ids(text)
 print("Tokens:", tokens)
 print("Token IDs:", token_ids)
 ```

 ```python
 import sentencepiece as spm
+from huggingface_hub import hf_hub_download
+# Download the model file from Hugging Face
+model_path = hf_hub_download(repo_id="shantipriya/OdiaTokenizer", filename="odia_tokenizers_test.model")
 # Load the tokenizer model
 sp = spm.SentencePieceProcessor()
+sp.load(model_path)
 # Sample text for tokenization
 text = "ଦୀପାବଳି ଏକ ଭାରତୀୟ ପର୍ବ ।"
+# Tokenize the text into pieces (subwords or tokens)
 tokens = sp.encode_as_pieces(text)
+# Tokenize the text into token IDs (integer representations of the tokens)
 token_ids = sp.encode_as_ids(text)
+# Print the tokenized output
 print("Tokens:", tokens)
 print("Token IDs:", token_ids)
 ```