{ "report_every": 100, "vocab_size_multiple": 8, "save_data": "/media/vincent/Crucial X6/NMT_work/en-de/runs/6-6-16-1024-4096/", "share_vocab": true, "src_words_min_frequency": 1, "transforms": [ "onmt_tokenize" ], "scoring_debug": true, "skip_empty_level": "silent", "src_vocab": "/media/vincent/Crucial X6/NMT_work/en-de/params/ende.vocab2", "log_file": "/media/vincent/Crucial X6/NMT_work/en-de/runs/6-6-16-1024-4096/6-6-16-1024-4096-hfstreaming.log", "valid_metrics": [ "BLEU" ], "seed": 1234, "dump_preds": "/media/vincent/Crucial X6/NMT_work/en-de/runs/6-6-16-1024-4096/", "training": { "max_grad_norm": 1.0, "world_size": 1, "normalization": "tokens", "bucket_size": 10000, "gpu_ranks": [ 0 ], "freeze_encoder": false, "valid_steps": 5000, "num_workers": 0, "reset_optim": "all", "average_decay": 0.0005, "save_checkpoint_steps": 5000, "batch_size": 12144, "estim_loss_lambda_steps": [ 0, 10000, 20000, 50000, 100000, 150000, 160000, 170000 ], "attention_dropout": [ 0.0, 0.0, 0.0, 0.0 ], "batch_type": "tokens", "valid_batch_size": 8192, "prefetch_factor": 400, "estim_loss_lambda": [ 1.0, 2.0, 5.0, 10.0, 100.0, 1000.0, 5000.0, 10000.0 ], "keep_checkpoint": 50, "learning_rate": 1.0, "adam_beta2": 0.998, "accum_steps": [ 0, 15000, 30000 ], "optim": "adamw", "dropout": [ 0.1, 0.1, 0.1, 0.0 ], "dropout_steps": [ 0, 15000, 30000, 95000 ], "accum_count": [ 6, 6, 6 ], "param_init_method": "xavier_uniform", "freeze_decoder": false, "model_path": "/media/vincent/Crucial X6/NMT_work/en-de/runs/6-6-16-1024-4096/6-6-16-1024-4096-hfstreaming", "torch_compile": false, "apex_opt_level": "", "decay_method": "noam", "score_threshold": 0.65, "compute_dtype": "torch.float16", "label_smoothing": 0.1, "train_steps": 50000, "warmup_steps": 6000, "batch_size_multiple": 1 }, "data": { "synth-mbr-decoded-sentlevel": { "weight": 12, "path_tgt": "hf://eole-nlp/synth-mbr-decoded-sentlevel/de", "path_align": null, "transforms": [ "onmt_tokenize", "filtertoolong" ], "path_sco": "hf://eole-nlp/synth-mbr-decoded-sentlevel/sco", "path_src": "hf://eole-nlp/synth-mbr-decoded-sentlevel/en" }, "synth-greedy-decoded-sentlevel": { "weight": 10, "path_tgt": "hf://eole-nlp/synth-greedy-decoded-sentlevel/de", "path_align": null, "transforms": [ "onmt_tokenize", "filtertoolong" ], "path_sco": "hf://eole-nlp/synth-greedy-decoded-sentlevel/sco", "path_src": "hf://eole-nlp/synth-greedy-decoded-sentlevel/en" }, "synth-qe-reranked-doclevel": { "weight": 1, "path_tgt": "hf://eole-nlp/synth-qe-reranked-doclevel/de", "path_align": null, "transforms": [ "onmt_tokenize", "filtertoolong" ], "path_sco": "hf://eole-nlp/synth-qe-reranked-doclevel/sco", "path_src": "hf://eole-nlp/synth-qe-reranked-doclevel/en" }, "synth-greedy-decoded-doclevel": { "weight": 2, "path_tgt": "hf://eole-nlp/europarl-v10.de-en/de", "path_align": null, "transforms": [ "onmt_tokenize", "filtertoolong" ], "path_sco": "hf://eole-nlp/europarl-v10.de-en/sco", "path_src": "hf://eole-nlp/europarl-v10.de-en/en" }, "valid": { "path_align": null, "transforms": [ "onmt_tokenize" ], "path_src": "/mnt/InternalCrucial4/data/en-de/testsets/newstest2023-src.en", "path_tgt": "/mnt/InternalCrucial4/data/en-de/testsets/newstest2023-ref.de" } }, "transforms_configs": { "filtertoolong": { "tgt_seq_length": 1024, "src_seq_length": 1024 }, "onmt_tokenize": { "src_subword_type": "bpe", "src_subword_model": "${MODEL_PATH}/subwords.en_de.bpe", "tgt_subword_type": "bpe", "tgt_subword_model": "${MODEL_PATH}/subwords.en_de.bpe", "src_onmttok_kwargs": { "mode": "aggressive", "joiner_annotate": true, "preserve_placeholders": true, "case_markup": true, "soft_case_regions": true, "preserve_segmented_tokens": true, "segment_case": true, "segment_numbers": true, "segment_alphabet_change": true }, "tgt_onmttok_kwargs": { "mode": "aggressive", "joiner_annotate": true, "preserve_placeholders": true, "case_markup": true, "soft_case_regions": true, "preserve_segmented_tokens": true, "segment_case": true, "segment_numbers": true, "segment_alphabet_change": true } } }, "model": { "add_estimator": false, "share_decoder_embeddings": true, "architecture": "transformer", "heads": 16, "add_ffnbias": true, "mlp_activation_fn": "gated-silu", "add_qkvbias": false, "norm_eps": 1e-06, "position_encoding_type": "Rotary", "hidden_size": 1024, "layers": 6, "layer_norm": "standard", "transformer_ff": 4096, "share_embeddings": true, "rope_config": { "rotary_interleave": false }, "embeddings": { "src_word_vec_size": 1024, "tgt_word_vec_size": 1024, "freeze_word_vecs_dec": false, "position_encoding_type": "Rotary", "word_vec_size": 1024 }, "decoder": { "norm_eps": 1e-06, "position_encoding_type": "Rotary", "layers": 6, "hidden_size": 1024, "heads": 16, "n_positions": null, "tgt_word_vec_size": 1024, "add_ffnbias": true, "layer_norm": "standard", "mlp_activation_fn": "gated-silu", "decoder_type": "transformer", "transformer_ff": 4096, "add_qkvbias": false, "rope_config": { "rotary_interleave": false } }, "encoder": { "norm_eps": 1e-06, "position_encoding_type": "Rotary", "layers": 6, "hidden_size": 1024, "heads": 16, "n_positions": null, "src_word_vec_size": 1024, "add_ffnbias": true, "mlp_activation_fn": "gated-silu", "transformer_ff": 4096, "layer_norm": "standard", "add_qkvbias": false, "encoder_type": "transformer", "rope_config": { "rotary_interleave": false } } } }