# In this example, we show how to train SimCSE on unsupervised Wikipedia data. | |
# If you want to train it with multiple GPU cards, see "run_sup_example.sh" | |
# about how to use PyTorch's distributed data parallel. | |
python train.py \ | |
--model_name_or_path $BC_ROBERTA_LARGE_PATH \ | |
--train_file $BC_WIKI1M_PATH \ | |
--output_dir result/simcse-roberta-large-with-mask \ | |
--num_train_epochs 1 \ | |
--per_device_train_batch_size 64 \ | |
--learning_rate 5e-6 \ | |
--max_seq_length 32 \ | |
--pooler_type cls \ | |
--mlp_only_train \ | |
--overwrite_output_dir \ | |
--temp 0.05 \ | |
--do_train \ | |
--fp16 \ | |
--do_mlm \ | |
--save_steps 5000 \ | |
# --load_best_model_at_end \ | |
# --eval_steps 125 \ | |
# --evaluation_strategy steps \ | |
# --metric_for_best_model stsb_spearman \ | |
# --do_eval \ | |
# "$@" | |