#!/bin/bash # In this example, we show how to train SimCSE on unsupervised Wikipedia data. # If you want to train it with multiple GPU cards, see "run_sup_example.sh" # about how to use PyTorch's distributed data parallel. python train.py \ --model_name_or_path $BC_ROBERTA_LARGE_PATH \ --train_file $BC_WIKI1M_PATH \ --output_dir result/simcse-roberta-large-with-mask \ --num_train_epochs 1 \ --per_device_train_batch_size 64 \ --learning_rate 5e-6 \ --max_seq_length 32 \ --pooler_type cls \ --mlp_only_train \ --overwrite_output_dir \ --temp 0.05 \ --do_train \ --fp16 \ --do_mlm \ --save_steps 5000 \ # --load_best_model_at_end \ # --eval_steps 125 \ # --evaluation_strategy steps \ # --metric_for_best_model stsb_spearman \ # --do_eval \ # "$@"