speechbrain
/

slu-timers-and-such-direct-librispeech-asr

speechbrain

English

Spoken language understanding

Model card Files Files and versions Community

lorenlugosch commited on Apr 4, 2021

Commit

ad478b7

1 Parent(s): 8fa5d45

hparams with no generated pointers

Browse files

Files changed (1) hide show

hyperparams.yaml +87 -124

hyperparams.yaml CHANGED Viewed

@@ -1,6 +1,3 @@
-# Generated 2021-03-21 from:
-# /home/mila/l/lugoschl/code/fork/speechbrain/recipes/timers-and-such/direct/hparams/train.yaml
-# yamllint disable
 # ############################################################################
 # Model: Direct SLU
 # Encoder: Pre-trained ASR encoder -> LSTM
@@ -11,36 +8,7 @@
 # Authors:  Loren Lugosch, Mirco Ravanelli 2020
 # ############################################################################
-# Seed needs to be set at top of yaml, before objects with parameters are made
-seed: 4
-__set_seed: !apply:torch.manual_seed [4]
-experiment: train-real-only
-output_folder: results/train-real-only/4
-save_folder: results/train-real-only/4/save
-train_log: results/train-real-only/4/train_log.txt
-# Data files
-data_folder: /localscratch/timers-and-such/
-                          # e.g, /localscratch/timers-and-such
-data_folder_rirs: /localscratch/timers-and-such/
-train_splits: [train-real]
-csv_train: results/train-real-only/4/train-type=direct.csv
-csv_dev_real: results/train-real-only/4/dev-real-type=direct.csv
-csv_dev_synth: results/train-real-only/4/dev-synth-type=direct.csv
-csv_test_real: results/train-real-only/4/test-real-type=direct.csv
-csv_test_synth: results/train-real-only/4/test-synth-type=direct.csv
-csv_all_real: results/train-real-only/4/all-real-type=direct.csv
-tokenizer_file: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model
-skip_prep: false
-ckpt_interval_minutes: 15 # save checkpoint every N min
-test_on_all_real: false
-# Training parameters
-number_of_epochs: 50
-batch_size: 16
-lr: 0.0003
 token_type: unigram # ["unigram", "bpe", "char"]
-sorting: random
 # Model parameters
 sample_rate: 16000
@@ -59,119 +27,114 @@ slu_beam_size: 80
 eos_threshold: 1.5
 temperature: 1.25
-dataloader_opts:
-  batch_size: 16
-  shuffle: true
-epoch_counter: &id009 !new:speechbrain.utils.epoch_loop.EpochCounter
-  limit: 50
 # Models
 asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
-  source: speechbrain/asr-crdnn-rnnlm-librispeech
-  run_opts: {device: cuda:0}
-slu_enc: &id001 !new:speechbrain.nnet.containers.Sequential
-  input_shape: [null, null, 512]
-  lstm: !new:speechbrain.nnet.RNN.LSTM
-    input_size: 512
-    bidirectional: true
-    hidden_size: 256
-    num_layers: 2
-  linear: !new:speechbrain.nnet.linear.Linear
-    input_size: 512
-    n_neurons: 256
-output_emb: &id002 !new:speechbrain.nnet.embedding.Embedding
-  num_embeddings: 51
-  embedding_dim: 128
-dec: &id003 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
-  enc_dim: 256
-  input_size: 128
-  rnn_type: gru
-  attn_type: keyvalue
-  hidden_size: 512
-  attn_dim: 512
-  num_layers: 3
-  scaling: 1.0
-  dropout: 0.0
-seq_lin: &id004 !new:speechbrain.nnet.linear.Linear
-  input_size: 512
-  n_neurons: 51
-env_corrupt: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
-  openrir_folder: /localscratch/timers-and-such/
-  babble_prob: 0.0
-  reverb_prob: 0.0
-  noise_prob: 1.0
-  noise_snr_low: 0
-  noise_snr_high: 15
 modules:
-  slu_enc: *id001
-  output_emb: *id002
-  dec: *id003
-  seq_lin: *id004
-  env_corrupt: *id005
-model: &id007 !new:torch.nn.ModuleList
-- [*id001, *id002, *id003, *id004]
-tokenizer: &id006 !new:sentencepiece.SentencePieceProcessor
 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
-  collect_in: results/train-real-only/4/save/TAS_tokenizer
-  loadables:
-    tokenizer: *id006
-  paths:
-    tokenizer: /home/mila/l/lugoschl/code/speechbrain/recipes/timers-and-such/Tokenizer/results/tokenizer_bpe51/51_unigram.model
 beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
-  embedding: *id002
-  decoder: *id003
-  linear: *id004
-  bos_index: 0
-  eos_index: 0
-  min_decode_ratio: 0.0
-  max_decode_ratio: 10.0
-  beam_size: 80
-  eos_threshold: 1.5
-  temperature: 1.25
-  using_max_attn_shift: false
-  max_attn_shift: 30
-  coverage_penalty: 0.
 opt_class: !name:torch.optim.Adam
-  lr: 0.0003
-lr_annealing: &id008 !new:speechbrain.nnet.schedulers.NewBobScheduler
-  initial_value: 0.0003
-  improvement_threshold: 0.0025
-  annealing_factor: 0.8
-  patient: 0
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
-  checkpoints_dir: results/train-real-only/4/save
-  recoverables:
-    model: *id007
-    scheduler: *id008
-    counter: *id009
 augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
-  sample_rate: 16000
-  speeds: [95, 100, 105]
 log_softmax: !new:speechbrain.nnet.activations.Softmax
-  apply_log: true
 seq_cost: !name:speechbrain.nnet.losses.nll_loss
-  label_smoothing: 0.1
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
-  save_file: results/train-real-only/4/train_log.txt
 error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
 cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
-  split_tokens: true

 # ############################################################################
 # Model: Direct SLU
 # Encoder: Pre-trained ASR encoder -> LSTM
 # Authors:  Loren Lugosch, Mirco Ravanelli 2020
 # ############################################################################
 token_type: unigram # ["unigram", "bpe", "char"]
 # Model parameters
 sample_rate: 16000
 eos_threshold: 1.5
 temperature: 1.25
 # Models
 asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
+    source: speechbrain/asr-crdnn-rnnlm-librispeech
+    run_opts: {"device":"cuda:0"}
+slu_enc: !new:speechbrain.nnet.containers.Sequential
+    input_shape: [null, null, !ref <ASR_encoder_dim>]
+    lstm: !new:speechbrain.nnet.RNN.LSTM
+        input_size: !ref <ASR_encoder_dim>
+        bidirectional: True
+        hidden_size: !ref <encoder_dim>
+        num_layers: 2
+    linear: !new:speechbrain.nnet.linear.Linear
+        input_size: !ref <encoder_dim> * 2
+        n_neurons: !ref <encoder_dim>
+output_emb: !new:speechbrain.nnet.embedding.Embedding
+    num_embeddings: !ref <output_neurons>
+    embedding_dim: !ref <emb_size>
+dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
+    enc_dim: !ref <encoder_dim>
+    input_size: !ref <emb_size>
+    rnn_type: gru
+    attn_type: keyvalue
+    hidden_size: !ref <dec_neurons>
+    attn_dim: 512
+    num_layers: 3
+    scaling: 1.0
+    dropout: 0.0
+seq_lin: !new:speechbrain.nnet.linear.Linear
+    input_size: !ref <dec_neurons>
+    n_neurons: !ref <output_neurons>
+env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
+    openrir_folder: !ref <data_folder_rirs>
+    babble_prob: 0.0
+    reverb_prob: 0.0
+    noise_prob: 1.0
+    noise_snr_low: 0
+    noise_snr_high: 15
 modules:
+    slu_enc: !ref <slu_enc>
+    output_emb: !ref <output_emb>
+    dec: !ref <dec>
+    seq_lin: !ref <seq_lin>
+    env_corrupt: !ref <env_corrupt>
+model: !new:torch.nn.ModuleList
+    - [!ref <slu_enc>, !ref <output_emb>,
+       !ref <dec>, !ref <seq_lin>]
+tokenizer: !new:sentencepiece.SentencePieceProcessor
 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    collect_in: !ref <save_folder>/TAS_tokenizer
+    loadables:
+        tokenizer: !ref <tokenizer>
+    paths:
+        tokenizer: !ref <tokenizer_file>
 beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
+    embedding: !ref <output_emb>
+    decoder: !ref <dec>
+    linear: !ref <seq_lin>
+    bos_index: !ref <bos_index>
+    eos_index: !ref <eos_index>
+    min_decode_ratio: !ref <min_decode_ratio>
+    max_decode_ratio: !ref <max_decode_ratio>
+    beam_size: !ref <slu_beam_size>
+    eos_threshold: !ref <eos_threshold>
+    temperature: !ref <temperature>
+    using_max_attn_shift: False
+    max_attn_shift: 30
+    coverage_penalty: 0.
 opt_class: !name:torch.optim.Adam
+    lr: !ref <lr>
+lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
+    initial_value: !ref <lr>
+    improvement_threshold: 0.0025
+    annealing_factor: 0.8
+    patient: 0
 checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
+    checkpoints_dir: !ref <save_folder>
+    recoverables:
+        model: !ref <model>
+        scheduler: !ref <lr_annealing>
+        counter: !ref <epoch_counter>
 augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
+    sample_rate: !ref <sample_rate>
+    speeds: [95, 100, 105]
 log_softmax: !new:speechbrain.nnet.activations.Softmax
+    apply_log: True
 seq_cost: !name:speechbrain.nnet.losses.nll_loss
+    label_smoothing: 0.1
 train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
+    save_file: !ref <train_log>
 error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
 cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
+    split_tokens: True