Update README.md
Browse files
README.md
CHANGED
@@ -65,8 +65,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
|
|
65 |
--learning_rate 1e-3 --batch_size 64 \
|
66 |
--span_masking --span_geo_prob 0.3 --span_max_length 5 \
|
67 |
--embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
|
68 |
-
--encoder transformer --mask fully_visible --layernorm_positioning pre\
|
69 |
-
--remove_transformer_bias --decoder transformer \
|
70 |
--target t5 --tie_weights
|
71 |
|
72 |
```
|
@@ -77,12 +76,12 @@ Stage2:
|
|
77 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
78 |
--vocab_path models/google_zh_with_sentinel_vocab.txt \
|
79 |
--dataset_path cluecorpussmall_t5_seq512_dataset.pt \
|
80 |
-
--
|
81 |
-
--dynamic_masking
|
82 |
```
|
83 |
|
84 |
```
|
85 |
-
python3 pretrain.py --dataset_path
|
86 |
--pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
|
87 |
--vocab_path models/google_zh_with_sentinel_vocab.txt \
|
88 |
--config_path models/t5/small_config.json \
|
@@ -92,8 +91,7 @@ python3 pretrain.py --dataset_path cluecorpussmall_t5_seq128_dataset.pt \
|
|
92 |
--learning_rate 1e-3 --batch_size 16 \
|
93 |
--span_masking --span_geo_prob 0.3 --span_max_length 5 \
|
94 |
--embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
|
95 |
-
--encoder transformer --mask fully_visible --layernorm_positioning pre\
|
96 |
-
--remove_transformer_bias --decoder transformer \
|
97 |
--target t5 --tie_weights
|
98 |
```
|
99 |
|
@@ -110,6 +108,14 @@ python3 scripts/convert_t5_from_uer_to_huggingface.py --input_model_path cluecor
|
|
110 |
### BibTeX entry and citation info
|
111 |
|
112 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
@article{zhao2019uer,
|
114 |
title={UER: An Open-Source Toolkit for Pre-training Models},
|
115 |
author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},
|
|
|
65 |
--learning_rate 1e-3 --batch_size 64 \
|
66 |
--span_masking --span_geo_prob 0.3 --span_max_length 5 \
|
67 |
--embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
|
68 |
+
--encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
|
|
|
69 |
--target t5 --tie_weights
|
70 |
|
71 |
```
|
|
|
76 |
python3 preprocess.py --corpus_path corpora/cluecorpussmall.txt \
|
77 |
--vocab_path models/google_zh_with_sentinel_vocab.txt \
|
78 |
--dataset_path cluecorpussmall_t5_seq512_dataset.pt \
|
79 |
+
--processes_num 32 --seq_length 512 \
|
80 |
+
--dynamic_masking --target t5
|
81 |
```
|
82 |
|
83 |
```
|
84 |
+
python3 pretrain.py --dataset_path cluecorpussmall_t5_seq512_dataset.pt \
|
85 |
--pretrained_model_path models/cluecorpussmall_t5_seq128_model.bin-1000000 \
|
86 |
--vocab_path models/google_zh_with_sentinel_vocab.txt \
|
87 |
--config_path models/t5/small_config.json \
|
|
|
91 |
--learning_rate 1e-3 --batch_size 16 \
|
92 |
--span_masking --span_geo_prob 0.3 --span_max_length 5 \
|
93 |
--embedding word --relative_position_embedding --remove_embedding_layernorm --tgt_embedding word \
|
94 |
+
--encoder transformer --mask fully_visible --layernorm_positioning pre --decoder transformer \
|
|
|
95 |
--target t5 --tie_weights
|
96 |
```
|
97 |
|
|
|
108 |
### BibTeX entry and citation info
|
109 |
|
110 |
```
|
111 |
+
@article{2020t5,
|
112 |
+
title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
|
113 |
+
author = {Colin Raffel and Noam Shazeer and Adam Roberts and Katherine Lee and Sharan Narang and Michael Matena and Yanqi Zhou and Wei Li and Peter J. Liu},
|
114 |
+
journal = {Journal of Machine Learning Research},
|
115 |
+
pages = {1-67},
|
116 |
+
year = {2020}
|
117 |
+
}
|
118 |
+
|
119 |
@article{zhao2019uer,
|
120 |
title={UER: An Open-Source Toolkit for Pre-training Models},
|
121 |
author={Zhao, Zhe and Chen, Hui and Zhang, Jinbin and Zhao, Xin and Liu, Tao and Lu, Wei and Chen, Xi and Deng, Haotang and Ju, Qi and Du, Xiaoyong},
|