uer
/

t5-base-chinese-cluecorpussmall

@@ -65,8 +65,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
                     --learning_rate 1e-3 --batch_size 64 \
                     --span_masking --span_geo_prob 0.3 --span_max_length 5 \
                     --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
-                    --encoder transformer --mask fully_visible --layernorm_positioning pre\
-                    --remove_transformer_bias --decoder transformer \
                     --target t5 --tie_weights
 ```
@@ -77,12 +76,12 @@ Stage2:
 python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
                       --vocab_path models/google_zh_with_sentinel_vocab.txt \
                       --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
-                      --seq_length 512 --processes_num 32 --target t5 \
-                      --dynamic_masking
 ```
 ```
-python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
                     --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
                     --vocab_path models/google_zh_with_sentinel_vocab.txt \
                     --config_path models/t5/small_config.json \
@@ -92,8 +91,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
                     --learning_rate 1e-3 --batch_size 16 \
                     --span_masking --span_geo_prob 0.3 --span_max_length 5 \
                     --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
-                    --encoder transformer --mask fully_visible --layernorm_positioning pre\
-                    --remove_transformer_bias --decoder transformer \
                     --target t5 --tie_weights
 ```
@@ -110,6 +108,14 @@ python3 scripts/convert_t5_from_uer_to_huggingface.py --input_model_path cluecor
 ### BibTeX entry and citation info
 ```
 @article{zhao2019uer,
   title={UER: An Open-Source Toolkit for Pre-training Models},
   author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},

                     --learning_rate 1e-3 --batch_size 64 \
                     --span_masking --span_geo_prob 0.3 --span_max_length 5 \
                     --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
+                    --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
                     --target t5 --tie_weights
 ```
 python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
                       --vocab_path models/google_zh_with_sentinel_vocab.txt \
                       --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
+                      --processes_num 32 --seq_length 512 \
+                      --dynamic_masking --target t5
 ```
 ```
+python3 pretrain.py --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
                     --pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
                     --vocab_path models/google_zh_with_sentinel_vocab.txt \
                     --config_path models/t5/small_config.json \
                     --learning_rate 1e-3 --batch_size 16 \
                     --span_masking --span_geo_prob 0.3 --span_max_length 5 \
                     --embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
+                    --encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
                     --target t5 --tie_weights
 ```
 ### BibTeX entry and citation info
 ```
+@article{2020t5,
+  title   = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
+  author  = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
+  journal = {Journal of Machine Learning Research},
+  pages   = {1-67},
+  year    = {2020}
+}
 @article{zhao2019uer,
   title={UER: An Open-Source Toolkit for Pre-training Models},
   author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},